[llvm] 7d75772 - [LoopVectorize] Convert some tests to opaque pointers (NFC)

Wed Dec 14 06:17:39 PST 2022

Author: Nikita Popov
Date: 2022-12-14T15:16:59+01:00
New Revision: 7d7577256b76e4293f455b8093504d5f7044ab4b

URL: https://github.com/llvm/llvm-project/commit/7d7577256b76e4293f455b8093504d5f7044ab4b
DIFF: https://github.com/llvm/llvm-project/commit/7d7577256b76e4293f455b8093504d5f7044ab4b.diff

LOG: [LoopVectorize] Convert some tests to opaque pointers (NFC)

Added: 
    

Modified: 
    llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
    llvm/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
    llvm/test/Transforms/LoopVectorize/AArch64/Oz-and-forced-vectorize.ll
    llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll
    llvm/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll
    llvm/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll
    llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll
    llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
    llvm/test/Transforms/LoopVectorize/AArch64/extend-vectorization-factor-for-unprofitable-memops.ll
    llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll
    llvm/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll
    llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll
    llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll
    llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
    llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
    llvm/test/Transforms/LoopVectorize/AArch64/loopvectorize_pr33804_double.ll
    llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
    llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll
    llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll
    llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll
    llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll
    llvm/test/Transforms/LoopVectorize/AArch64/pr31900.ll
    llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll
    llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll
    llvm/test/Transforms/LoopVectorize/AArch64/pr46950-load-cast-context-crash.ll
    llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll
    llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
    llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll
    llvm/test/Transforms/LoopVectorize/AArch64/scalable-alloca.ll
    llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
    llvm/test/Transforms/LoopVectorize/AArch64/scalable-predicate-instruction.ll
    llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions-tf.ll
    llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
    llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll
    llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll
    llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
    llvm/test/Transforms/LoopVectorize/AArch64/scalarize-store-with-predication.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll
    llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll
    llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
    llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll
    llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-vf1.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-scalable-load-in-loop.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
    llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll
    llvm/test/Transforms/LoopVectorize/AArch64/unsafe-vf-hint-remark.ll
    llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll
    llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
    llvm/test/Transforms/LoopVectorize/AMDGPU/divergent-runtime-check.ll
    llvm/test/Transforms/LoopVectorize/AMDGPU/packed-fp32.ll
    llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll
    llvm/test/Transforms/LoopVectorize/AMDGPU/unroll-in-loop-vectorizer.ll
    llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll
    llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
    llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll
    llvm/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
    llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
    llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll
    llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll
    llvm/test/Transforms/LoopVectorize/ARM/mve-predstorecost.ll
    llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll
    llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
    llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll
    llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
    llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll
    llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll
    llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll
    llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll
    llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reductions-allowed.ll
    llvm/test/Transforms/LoopVectorize/ARM/width-detect.ll
    llvm/test/Transforms/LoopVectorize/Hexagon/invalidate-cm-after-invalidating-interleavegroups.ll
    llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll
    llvm/test/Transforms/LoopVectorize/Hexagon/minimum-vf.ll
    llvm/test/Transforms/LoopVectorize/PowerPC/interleave_IC.ll
    llvm/test/Transforms/LoopVectorize/PowerPC/interleaved-pointer-runtime-check-unprofitable.ll
    llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll
    llvm/test/Transforms/LoopVectorize/PowerPC/massv-altivec.ll
    llvm/test/Transforms/LoopVectorize/PowerPC/massv-calls.ll
    llvm/test/Transforms/LoopVectorize/PowerPC/massv-nobuiltin.ll
    llvm/test/Transforms/LoopVectorize/PowerPC/massv-unsupported.ll
    llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization-profitability.ll
    llvm/test/Transforms/LoopVectorize/PowerPC/pr41179.ll
    llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
    llvm/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
    llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-only-for-real.ll
    llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
    llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-call.ll
    llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll
    llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll
    llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll
    llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll
    llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll
    llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll
    llvm/test/Transforms/LoopVectorize/SystemZ/branch-for-predicated-block.ll
    llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-1.ll
    llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll
    llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-02.ll
    llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll
    llvm/test/Transforms/LoopVectorize/SystemZ/zero_unroll.ll
    llvm/test/Transforms/LoopVectorize/VE/disable_lv.ll
    llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll
    llvm/test/Transforms/LoopVectorize/X86/avx1.ll
    llvm/test/Transforms/LoopVectorize/X86/avx512.ll
    llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-cg-bug.ll
    llvm/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
    llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
    llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
    llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll
    llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll
    llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
    llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
    llvm/test/Transforms/LoopVectorize/X86/funclet.ll
    llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll
    llvm/test/Transforms/LoopVectorize/X86/gather-vs-interleave.ll
    llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll
    llvm/test/Transforms/LoopVectorize/X86/int128_no_gather.ll
    llvm/test/Transforms/LoopVectorize/X86/interleave_short_tc.ll
    llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-large-gap.ll
    llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-waw-dependency.ll
    llvm/test/Transforms/LoopVectorize/X86/interleaving.ll
    llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll
    llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll
    llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll
    llvm/test/Transforms/LoopVectorize/X86/max-mstore.ll
    llvm/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
    llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll
    llvm/test/Transforms/LoopVectorize/X86/no-vector.ll
    llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll
    llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll
    llvm/test/Transforms/LoopVectorize/X86/nontemporal.ll
    llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll
    llvm/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
    llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll
    llvm/test/Transforms/LoopVectorize/X86/powof2div.ll
    llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
    llvm/test/Transforms/LoopVectorize/X86/pr39160.ll
    llvm/test/Transforms/LoopVectorize/X86/pr42674.ll
    llvm/test/Transforms/LoopVectorize/X86/pr54413-select-interleave-count-loop-with-cost-zero.ll
    llvm/test/Transforms/LoopVectorize/X86/reduction-crash.ll
    llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll
    llvm/test/Transforms/LoopVectorize/X86/redundant-vf2-cost.ll
    llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll
    llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll
    llvm/test/Transforms/LoopVectorize/X86/register-assumption.ll
    llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll
    llvm/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll
    llvm/test/Transforms/LoopVectorize/X86/struct-store.ll
    llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
    llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
    llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll
    llvm/test/Transforms/LoopVectorize/X86/tripcount.ll
    llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
    llvm/test/Transforms/LoopVectorize/X86/uniform-phi.ll
    llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
    llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
    llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll
    llvm/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
    llvm/test/Transforms/LoopVectorize/X86/unroll_selection.ll
    llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll
    llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
    llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll
    llvm/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
    llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
    llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll
    llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
    llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll
    llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
    llvm/test/Transforms/LoopVectorize/X86/vectorize-only-for-real.ll
    llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
    llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
    llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll
    llvm/test/Transforms/LoopVectorize/X86/x86_fp80-interleaved-access.ll
    llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
    llvm/test/Transforms/LoopVectorize/alias-set-with-uncomputable-bounds.ll
    llvm/test/Transforms/LoopVectorize/align.ll
    llvm/test/Transforms/LoopVectorize/assume.ll
    llvm/test/Transforms/LoopVectorize/bsd_regex.ll
    llvm/test/Transforms/LoopVectorize/calloc.ll
    llvm/test/Transforms/LoopVectorize/cast-induction.ll
    llvm/test/Transforms/LoopVectorize/check-prof-info.ll
    llvm/test/Transforms/LoopVectorize/conditional-assignment.ll
    llvm/test/Transforms/LoopVectorize/control-flow.ll
    llvm/test/Transforms/LoopVectorize/cpp-new-array.ll
    llvm/test/Transforms/LoopVectorize/dbg.value.ll
    llvm/test/Transforms/LoopVectorize/dead_instructions.ll
    llvm/test/Transforms/LoopVectorize/debugloc.ll
    llvm/test/Transforms/LoopVectorize/demanded-bits-of-pointer-instruction.ll
    llvm/test/Transforms/LoopVectorize/diag-missing-instr-debug-loc.ll
    llvm/test/Transforms/LoopVectorize/diag-with-hotness-info-2.ll
    llvm/test/Transforms/LoopVectorize/diag-with-hotness-info.ll
    llvm/test/Transforms/LoopVectorize/disable_nonforced.ll
    llvm/test/Transforms/LoopVectorize/disable_nonforced_enable.ll
    llvm/test/Transforms/LoopVectorize/discriminator.ll
    llvm/test/Transforms/LoopVectorize/exact.ll
    llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll
    llvm/test/Transforms/LoopVectorize/explicit_outer_nonuniform_inner.ll
    llvm/test/Transforms/LoopVectorize/explicit_outer_uniform_diverg_branch.ll
    llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll
    llvm/test/Transforms/LoopVectorize/fix-reduction-dbg.ll
    llvm/test/Transforms/LoopVectorize/flags.ll
    llvm/test/Transforms/LoopVectorize/float-induction.ll
    llvm/test/Transforms/LoopVectorize/float-reduction.ll
    llvm/test/Transforms/LoopVectorize/fneg.ll
    llvm/test/Transforms/LoopVectorize/followup.ll
    llvm/test/Transforms/LoopVectorize/forked-pointers.ll
    llvm/test/Transforms/LoopVectorize/funcall.ll
    llvm/test/Transforms/LoopVectorize/gcc-examples.ll
    llvm/test/Transforms/LoopVectorize/gep_with_bitcast.ll
    llvm/test/Transforms/LoopVectorize/hints-trans.ll
    llvm/test/Transforms/LoopVectorize/hoist-loads.ll
    llvm/test/Transforms/LoopVectorize/i8-induction.ll
    llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
    llvm/test/Transforms/LoopVectorize/if-conv-crash.ll
    llvm/test/Transforms/LoopVectorize/if-conversion-edgemasks.ll
    llvm/test/Transforms/LoopVectorize/if-conversion-reduction.ll
    llvm/test/Transforms/LoopVectorize/if-conversion.ll
    llvm/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll
    llvm/test/Transforms/LoopVectorize/if-reduction.ll
    llvm/test/Transforms/LoopVectorize/incorrect-dom-info.ll
    llvm/test/Transforms/LoopVectorize/increment.ll
    llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll
    llvm/test/Transforms/LoopVectorize/induction-unroll-novec.ll
    llvm/test/Transforms/LoopVectorize/infiniteloop.ll
    llvm/test/Transforms/LoopVectorize/int_sideeffect.ll
    llvm/test/Transforms/LoopVectorize/interleaved-accesses-1.ll
    llvm/test/Transforms/LoopVectorize/interleaved-accesses-2.ll
    llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll
    llvm/test/Transforms/LoopVectorize/interleaved-accesses-alias.ll
    llvm/test/Transforms/LoopVectorize/interleaved-accesses-masked-group.ll
    llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
    llvm/test/Transforms/LoopVectorize/interleaved-accesses-uniform-load.ll
    llvm/test/Transforms/LoopVectorize/interleaved-acess-with-remarks.ll
    llvm/test/Transforms/LoopVectorize/intrinsic.ll
    llvm/test/Transforms/LoopVectorize/irregular_type.ll
    llvm/test/Transforms/LoopVectorize/libcall-remark.ll
    llvm/test/Transforms/LoopVectorize/lifetime.ll
    llvm/test/Transforms/LoopVectorize/loop-scalars.ll
    llvm/test/Transforms/LoopVectorize/loop-vect-memdep.ll
    llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
    llvm/test/Transforms/LoopVectorize/memdep.ll
    llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll
    llvm/test/Transforms/LoopVectorize/metadata-unroll.ll
    llvm/test/Transforms/LoopVectorize/metadata-width.ll
    llvm/test/Transforms/LoopVectorize/metadata.ll
    llvm/test/Transforms/LoopVectorize/middle-block-dbg.ll
    llvm/test/Transforms/LoopVectorize/miniters.ll
    llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
    llvm/test/Transforms/LoopVectorize/mixed-precision-remarks.ll
    llvm/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
    llvm/test/Transforms/LoopVectorize/multiple-exits-versioning.ll
    llvm/test/Transforms/LoopVectorize/no_array_bounds.ll
    llvm/test/Transforms/LoopVectorize/no_array_bounds_scalable.ll
    llvm/test/Transforms/LoopVectorize/no_idiv_reduction.ll
    llvm/test/Transforms/LoopVectorize/no_int_induction.ll
    llvm/test/Transforms/LoopVectorize/no_switch.ll
    llvm/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll
    llvm/test/Transforms/LoopVectorize/noalias-md-licm.ll
    llvm/test/Transforms/LoopVectorize/noalias-md.ll
    llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll
    llvm/test/Transforms/LoopVectorize/nofloat-report.ll
    llvm/test/Transforms/LoopVectorize/nofloat.ll
    llvm/test/Transforms/LoopVectorize/non-const-n.ll
    llvm/test/Transforms/LoopVectorize/nontemporal.ll
    llvm/test/Transforms/LoopVectorize/nounroll.ll
    llvm/test/Transforms/LoopVectorize/novect-lcssa-cfg-invalidation.ll
    llvm/test/Transforms/LoopVectorize/nuw.ll
    llvm/test/Transforms/LoopVectorize/opt.ll
    llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll
    llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll
    llvm/test/Transforms/LoopVectorize/optsize.ll
    llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll
    llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll
    llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll
    llvm/test/Transforms/LoopVectorize/partial-lcssa.ll
    llvm/test/Transforms/LoopVectorize/phi-cost.ll
    llvm/test/Transforms/LoopVectorize/pr25281.ll
    llvm/test/Transforms/LoopVectorize/pr28541.ll
    llvm/test/Transforms/LoopVectorize/pr30806-phi-scev.ll
    llvm/test/Transforms/LoopVectorize/pr31098.ll
    llvm/test/Transforms/LoopVectorize/pr31190.ll
    llvm/test/Transforms/LoopVectorize/pr33706.ll
    llvm/test/Transforms/LoopVectorize/pr34681.ll
    llvm/test/Transforms/LoopVectorize/pr36311.ll
    llvm/test/Transforms/LoopVectorize/pr37248.ll
    llvm/test/Transforms/LoopVectorize/pr39099.ll
    llvm/test/Transforms/LoopVectorize/pr44488-predication.ll
    llvm/test/Transforms/LoopVectorize/pr45525.ll
    llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
    llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll
    llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll
    llvm/test/Transforms/LoopVectorize/pr48832.ll
    llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll
    llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
    llvm/test/Transforms/LoopVectorize/ptr-induction.ll
    llvm/test/Transforms/LoopVectorize/ptr_loops.ll
    llvm/test/Transforms/LoopVectorize/read-only.ll
    llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
    llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
    llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll
    llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
    llvm/test/Transforms/LoopVectorize/reduction-order.ll
    llvm/test/Transforms/LoopVectorize/reduction-predselect.ll
    llvm/test/Transforms/LoopVectorize/reduction-ptr.ll
    llvm/test/Transforms/LoopVectorize/reduction.ll
    llvm/test/Transforms/LoopVectorize/remarks-multi-exit-loops.ll
    llvm/test/Transforms/LoopVectorize/remove_metadata.ll
    llvm/test/Transforms/LoopVectorize/reverse_induction.ll
    llvm/test/Transforms/LoopVectorize/reverse_iter.ll
    llvm/test/Transforms/LoopVectorize/runtime-check-address-space.ll
    llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll
    llvm/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
    llvm/test/Transforms/LoopVectorize/runtime-check-readonly.ll
    llvm/test/Transforms/LoopVectorize/runtime-drop-crash.ll
    llvm/test/Transforms/LoopVectorize/safegep.ll
    llvm/test/Transforms/LoopVectorize/same-base-access.ll
    llvm/test/Transforms/LoopVectorize/scalable-assume.ll
    llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
    llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
    llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll
    llvm/test/Transforms/LoopVectorize/scalable-noalias-scope-decl.ll
    llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
    llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll
    llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll
    llvm/test/Transforms/LoopVectorize/scalar-select.ll
    llvm/test/Transforms/LoopVectorize/scev-during-mutation.ll
    llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll
    llvm/test/Transforms/LoopVectorize/select-cmp.ll
    llvm/test/Transforms/LoopVectorize/simple-unroll.ll
    llvm/test/Transforms/LoopVectorize/skip-iterations.ll
    llvm/test/Transforms/LoopVectorize/small-loop.ll
    llvm/test/Transforms/LoopVectorize/start-non-zero.ll
    llvm/test/Transforms/LoopVectorize/store-shuffle-bug.ll
    llvm/test/Transforms/LoopVectorize/struct_access.ll
    llvm/test/Transforms/LoopVectorize/tbaa-nodep.ll
    llvm/test/Transforms/LoopVectorize/tripcount.ll
    llvm/test/Transforms/LoopVectorize/trunc-reductions.ll
    llvm/test/Transforms/LoopVectorize/undef-inst-bug.ll
    llvm/test/Transforms/LoopVectorize/unroll-novec-memcheck-metadata.ll
    llvm/test/Transforms/LoopVectorize/unroll.ll
    llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll
    llvm/test/Transforms/LoopVectorize/unroll_novec.ll
    llvm/test/Transforms/LoopVectorize/unsafe-dep-remark.ll
    llvm/test/Transforms/LoopVectorize/unsafe-vf-hint-remark.ll
    llvm/test/Transforms/LoopVectorize/value-ptr-bug.ll
    llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll
    llvm/test/Transforms/LoopVectorize/vect.stats.ll
    llvm/test/Transforms/LoopVectorize/vector-geps.ll
    llvm/test/Transforms/LoopVectorize/vectorize-pointer-phis.ll
    llvm/test/Transforms/LoopVectorize/vectorizeVFone.ll
    llvm/test/Transforms/LoopVectorize/version-mem-access.ll
    llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll
    llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
    llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
    llvm/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll
    llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll
    llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
    llvm/test/Transforms/LoopVectorize/write-only.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll b/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
index e1b922f6d5e50..279d4e82ccb85 100644

--- a/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
+++ b/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
@@ -6,15 +6,15 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK: icmp eq <4 x i32>
 ;CHECK: select <4 x i1>
 ;CHECK: ret i32
-define i32 @foo(i32 %x, i32 %t, i32* nocapture %A) nounwind uwtable ssp {
+define i32 @foo(i32 %x, i32 %t, ptr nocapture %A) nounwind uwtable ssp {
 entry:
   %cmp10 = icmp sgt i32 %x, 0
   br i1 %cmp10, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %if.end
   %indvars.iv = phi i64 [ %indvars.iv.next, %if.end ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %if.end, label %if.then
 
@@ -28,7 +28,7 @@ if.then:                                          ; preds = %for.body
 
 if.end:                                           ; preds = %for.body, %if.then
   %z.0 = phi i32 [ %add1, %if.then ], [ 9, %for.body ]
-  store i32 %z.0, i32* %arrayidx, align 4
+  store i32 %z.0, ptr %arrayidx, align 4
   %indvars.iv.next = add nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %x

diff  --git a/llvm/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll b/llvm/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
index 71186682069e2..e6ac85df57b3f 100644
--- a/llvm/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
+++ b/llvm/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
@@ -8,41 +8,38 @@ module asm "\09.ident\09\22GCC: (GNU) 4.6.3 LLVM: 3.2svn\22"
 
 @b = common global [32000 x float] zeroinitializer, align 16
 
-define i32 @set1ds(i32 %_n, float* nocapture %arr, float %value, i32 %stride) nounwind uwtable {
+define i32 @set1ds(i32 %_n, ptr nocapture %arr, float %value, i32 %stride) nounwind uwtable {
 entry:
   %0 = icmp sgt i32 %_n, 0
   br i1 %0, label %"3.lr.ph", label %"5"
 
 "3.lr.ph":                                        ; preds = %entry
-  %1 = bitcast float* %arr to i8*
-  %2 = sext i32 %stride to i64
+  %1 = sext i32 %stride to i64
   br label %"3"
 
 "3":                                              ; preds = %"3.lr.ph", %"3"
   %indvars.iv = phi i64 [ 0, %"3.lr.ph" ], [ %indvars.iv.next, %"3" ]
-  %3 = shl nsw i64 %indvars.iv, 2
-  %4 = getelementptr inbounds i8, i8* %1, i64 %3
-  %5 = bitcast i8* %4 to float*
-  store float %value, float* %5, align 4
-  %indvars.iv.next = add i64 %indvars.iv, %2
-  %6 = trunc i64 %indvars.iv.next to i32
-  %7 = icmp slt i32 %6, %_n
-  br i1 %7, label %"3", label %"5"
+  %2 = shl nsw i64 %indvars.iv, 2
+  %3 = getelementptr inbounds i8, ptr %arr, i64 %2
+  store float %value, ptr %3, align 4
+  %indvars.iv.next = add i64 %indvars.iv, %1
+  %4 = trunc i64 %indvars.iv.next to i32
+  %5 = icmp slt i32 %4, %_n
+  br i1 %5, label %"3", label %"5"
 
 "5":                                              ; preds = %"3", %entry
   ret i32 0
 }
 
-define i32 @init(i8* nocapture %name) unnamed_addr nounwind uwtable {
+define i32 @init(ptr nocapture %name) unnamed_addr nounwind uwtable {
 entry:
   br label %"3"
 
 "3":                                              ; preds = %"3", %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %"3" ]
   %0 = shl nsw i64 %indvars.iv, 2
-  %1 = getelementptr inbounds i8, i8* bitcast (float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 16000) to i8*), i64 %0
-  %2 = bitcast i8* %1 to float*
-  store float -1.000000e+00, float* %2, align 4
+  %1 = getelementptr inbounds i8, ptr getelementptr inbounds ([32000 x float], ptr @b, i64 0, i64 16000), i64 %0
+  store float -1.000000e+00, ptr %1, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 16000

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/Oz-and-forced-vectorize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/Oz-and-forced-vectorize.ll
index a7bf05222bae1..798a9cf2a720c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/Oz-and-forced-vectorize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/Oz-and-forced-vectorize.ll
@@ -7,7 +7,7 @@
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-ios5.0.0"
 
-define void @foo(float* noalias nocapture %ptrA, float* noalias nocapture readonly %ptrB, i64 %size) {
+define void @foo(ptr noalias nocapture %ptrA, ptr noalias nocapture readonly %ptrB, i64 %size) {
 ; CHECK-LABEL: @foo(
 ; CHECK: fmul <4 x float>
 ;
@@ -20,12 +20,12 @@ for.cond:                                         ; preds = %for.body, %entry
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 
 for.body:                                         ; preds = %for.cond
-  %arrayidx = getelementptr inbounds float, float* %ptrB, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %ptrA, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %ptrB, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %ptrA, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %mul3 = fmul float %0, %1
-  store float %mul3, float* %arrayidx2, align 4
+  store float %mul3, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   br label %for.cond, !llvm.loop !0
 

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll
index 5309f03f34c71..cc6ef0feecffd 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll
@@ -16,16 +16,15 @@ target triple = "aarch64--linux-gnu"
 ; COST:        LV: Found an estimated cost of 4 for VF 2 For instruction: %var4 = udiv i64 %var2, %var3
 ;
 ;
-define i64 @predicated_udiv_scalarized_operand(i64* %a, i64 %x) optsize {
+define i64 @predicated_udiv_scalarized_operand(ptr %a, i64 %x) optsize {
 ; CHECK-LABEL: @predicated_udiv_scalarized_operand(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[ENTRY]] ], [ [[TMP17:%.*]], [[PRED_UDIV_CONTINUE2]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[TMP0]] to <2 x i64>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <2 x i64> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
@@ -64,8 +63,8 @@ entry:
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
   %r = phi i64 [ 0, %entry ], [ %var6, %for.inc ]
-  %var0 = getelementptr inbounds i64, i64* %a, i64 %i
-  %var2 = load i64, i64* %var0, align 4
+  %var0 = getelementptr inbounds i64, ptr %a, i64 %i
+  %var2 = load i64, ptr %var0, align 4
   %cond0 = icmp sgt i64 %var2, 0
   br i1 %cond0, label %if.then, label %for.inc
 

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll
index b9f97b0b8a5cc..4df0ad9e0eb19 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll
@@ -2,7 +2,7 @@
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 
 ; Function Attrs: nounwind
-define i32* @array_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* %c, i32 %size) {
+define ptr @array_add(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr %c, i32 %size) {
 ;CHECK-LABEL: array_add
 ;CHECK: load <4 x i32>
 ;CHECK: load <4 x i32>
@@ -22,13 +22,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %size
@@ -38,5 +38,5 @@ for.end.loopexit:                                 ; preds = %for.body
   br label %for.end
 
 for.end:                                          ; preds = %for.end.loopexit, %entry
-  ret i32* %c
+  ret ptr %c
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll
index 1e91674705a79..8e963fc5997b8 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll
@@ -2,7 +2,7 @@
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 
 ; Function Attrs: nounwind
-define i32* @array_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* %c, i32 %size) {
+define ptr @array_add(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr %c, i32 %size) {
 ;CHECK-LABEL: array_add
 ;CHECK: load <4 x i32>
 ;CHECK: load <4 x i32>
@@ -22,13 +22,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %size
@@ -38,5 +38,5 @@ for.end.loopexit:                                 ; preds = %for.body
   br label %for.end
 
 for.end:                                          ; preds = %for.end.loopexit, %entry
-  ret i32* %c
+  ret ptr %c
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll b/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll
index d7d8bc8f69037..c47a63020281a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll
@@ -12,9 +12,9 @@
 ; CHECK-LABEL: test_sge
 ; CHECK-LABEL: vector.scevcheck
 ; CHECK-LABEL: vector.body
-define void @test_sge(i32* noalias %A,
-                      i32* noalias %B,
-                      i32* noalias %C, i32 %N) {
+define void @test_sge(ptr noalias %A,
+                      ptr noalias %B,
+                      ptr noalias %C, i32 %N) {
 entry:
   %cmp13 = icmp eq i32 %N, 0
   br i1 %cmp13, label %for.end, label %for.body.preheader
@@ -27,15 +27,15 @@ for.body:
   %indvars.next = add i16 %indvars.iv, 1
   %indvars.ext = zext i16 %indvars.iv to i32
 
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
-  %1 = load i32, i32* %arrayidx3, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %indvars.ext
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %C, i32 %indvars.ext
+  %1 = load i32, ptr %arrayidx3, align 4
 
   %mul4 = mul i32 %1, %0
 
-  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
-  store i32 %mul4, i32* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %A, i32 %indvars.ext
+  store i32 %mul4, ptr %arrayidx7, align 4
 
   %exitcond = icmp sge i32 %indvars.ext, %N
   br i1 %exitcond, label %for.end.loopexit, label %for.body
@@ -50,9 +50,9 @@ for.end:
 ; CHECK-LABEL: test_uge
 ; CHECK-LABEL: vector.scevcheck
 ; CHECK-LABEL: vector.body
-define void @test_uge(i32* noalias %A,
-                      i32* noalias %B,
-                      i32* noalias %C, i32 %N, i32 %Offset) {
+define void @test_uge(ptr noalias %A,
+                      ptr noalias %B,
+                      ptr noalias %C, i32 %N, i32 %Offset) {
 entry:
   %cmp13 = icmp eq i32 %N, 0
   br i1 %cmp13, label %for.end, label %for.body.preheader
@@ -67,15 +67,15 @@ for.body:
   %indvars.ext = sext i16 %indvars.iv to i32
   %indvars.access = add i32 %Offset, %indvars.ext
 
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.access
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.access
-  %1 = load i32, i32* %arrayidx3, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %indvars.access
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %C, i32 %indvars.access
+  %1 = load i32, ptr %arrayidx3, align 4
 
   %mul4 = add i32 %1, %0
 
-  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.access
-  store i32 %mul4, i32* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %A, i32 %indvars.access
+  store i32 %mul4, ptr %arrayidx7, align 4
 
   %exitcond = icmp uge i32 %indvars.ext, %N
   br i1 %exitcond, label %for.end.loopexit, label %for.body
@@ -90,9 +90,9 @@ for.end:
 ; CHECK-LABEL: test_ule
 ; CHECK-LABEL: vector.scevcheck
 ; CHECK-LABEL: vector.body
-define void @test_ule(i32* noalias %A,
-                      i32* noalias %B,
-                      i32* noalias %C, i32 %N,
+define void @test_ule(ptr noalias %A,
+                      ptr noalias %B,
+                      ptr noalias %C, i32 %N,
                       i16 %M) {
 entry:
   %cmp13 = icmp eq i32 %N, 0
@@ -106,15 +106,15 @@ for.body:
   %indvars.next = sub i16 %indvars.iv, 1
   %indvars.ext = zext i16 %indvars.iv to i32
 
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
-  %1 = load i32, i32* %arrayidx3, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %indvars.ext
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %C, i32 %indvars.ext
+  %1 = load i32, ptr %arrayidx3, align 4
 
   %mul4 = mul i32 %1, %0
 
-  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
-  store i32 %mul4, i32* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %A, i32 %indvars.ext
+  store i32 %mul4, ptr %arrayidx7, align 4
 
   %exitcond = icmp ule i32 %indvars.ext, %N
   br i1 %exitcond, label %for.end.loopexit, label %for.body
@@ -129,9 +129,9 @@ for.end:
 ; CHECK-LABEL: test_sle
 ; CHECK-LABEL: vector.scevcheck
 ; CHECK-LABEL: vector.body
-define void @test_sle(i32* noalias %A,
-                   i32* noalias %B,
-                   i32* noalias %C, i32 %N,
+define void @test_sle(ptr noalias %A,
+                   ptr noalias %B,
+                   ptr noalias %C, i32 %N,
                    i16 %M) {
 entry:
   %cmp13 = icmp eq i32 %N, 0
@@ -145,15 +145,15 @@ for.body:
   %indvars.next = sub i16 %indvars.iv, 1
   %indvars.ext = sext i16 %indvars.iv to i32
 
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
-  %1 = load i32, i32* %arrayidx3, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %indvars.ext
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %C, i32 %indvars.ext
+  %1 = load i32, ptr %arrayidx3, align 4
 
   %mul4 = mul i32 %1, %0
 
-  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
-  store i32 %mul4, i32* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %A, i32 %indvars.ext
+  store i32 %mul4, ptr %arrayidx7, align 4
 
   %exitcond = icmp sle i32 %indvars.ext, %N
   br i1 %exitcond, label %for.end.loopexit, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
index 2236cfe3eb7a1..c0b2073b59ec9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
@@ -14,14 +14,14 @@ target triple = "aarch64"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
 
-define void @f1(i32* %A) #0 {
+define void @f1(ptr %A) #0 {
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %iv
-  store i32 1, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %iv
+  store i32 1, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp ne i64 %iv.next, 1024
   br i1 %exitcond, label %for.body, label %exit

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/extend-vectorization-factor-for-unprofitable-memops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extend-vectorization-factor-for-unprofitable-memops.ll
index 2a6cb5a93d2cf..113a36140eea9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/extend-vectorization-factor-for-unprofitable-memops.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/extend-vectorization-factor-for-unprofitable-memops.ll
@@ -7,7 +7,7 @@
 ; maximum VF for NEON is calculated by 128/size of smallest type in loop.
 ; And while we don't have an instruction to  load 4 x i8, vectorization
 ; might still be profitable.
-define void @test_load_i8_store_i32(i8* noalias %src, i32* noalias %dst, i32 %off, i64 %N) {
+define void @test_load_i8_store_i32(ptr noalias %src, ptr noalias %dst, i32 %off, i64 %N) {
 ; CHECK-LABEL: @test_load_i8_store_i32(
 ; CHECK:       <16 x i8>
 ;
@@ -16,12 +16,12 @@ entry:
 
 loop:
   %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
-  %gep.src = getelementptr inbounds i8, i8* %src, i64 %iv
-  %lv = load i8, i8* %gep.src, align 1
+  %gep.src = getelementptr inbounds i8, ptr %src, i64 %iv
+  %lv = load i8, ptr %gep.src, align 1
   %lv.ext = zext i8 %lv to i32
   %add = add i32 %lv.ext, %off
-  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 %iv
-  store i32 %add, i32* %gep.dst
+  %gep.dst = getelementptr inbounds i32, ptr %dst, i64 %iv
+  store i32 %add, ptr %gep.dst
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
   br i1 %exitcond.not, label %exit, label %loop
@@ -31,7 +31,7 @@ exit:
 }
 
 ; Same as test_load_i8_store_i32, but with types flipped for load and store.
-define void @test_load_i32_store_i8(i32* noalias %src, i8* noalias %dst, i32 %off, i64 %N) {
+define void @test_load_i32_store_i8(ptr noalias %src, ptr noalias %dst, i32 %off, i64 %N) {
 ; CHECK-LABEL: @test_load_i32_store_i8(
 ; CHECK:     <16 x i8>
 ;
@@ -40,12 +40,12 @@ entry:
 
 loop:
   %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
-  %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
-  %lv = load i32, i32* %gep.src, align 1
+  %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+  %lv = load i32, ptr %gep.src, align 1
   %add = add i32 %lv, %off
   %add.trunc = trunc i32 %add to i8
-  %gep.dst = getelementptr inbounds i8, i8* %dst, i64 %iv
-  store i8 %add.trunc, i8* %gep.dst
+  %gep.dst = getelementptr inbounds i8, ptr %dst, i64 %iv
+  store i8 %add.trunc, ptr %gep.dst
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
   br i1 %exitcond.not, label %exit, label %loop
@@ -55,7 +55,7 @@ exit:
 }
 
 ; All memory operations use i32, all memory operations are profitable with VF 4.
-define void @test_load_i32_store_i32(i32* noalias %src, i32* noalias %dst, i8 %off, i64 %N) {
+define void @test_load_i32_store_i32(ptr noalias %src, ptr noalias %dst, i8 %off, i64 %N) {
 ; CHECK-LABEL: @test_load_i32_store_i32(
 ; CHECK: vector.body:
 ; CHECK:   <4 x i32>
@@ -65,13 +65,13 @@ entry:
 
 loop:
   %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
-  %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
-  %lv = load i32, i32* %gep.src, align 1
+  %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+  %lv = load i32, ptr %gep.src, align 1
   %lv.trunc = trunc i32 %lv to i8
   %add = add i8 %lv.trunc, %off
   %add.ext = zext i8 %add to i32
-  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 %iv
-  store i32 %add.ext, i32* %gep.dst
+  %gep.dst = getelementptr inbounds i32, ptr %dst, i64 %iv
+  store i32 %add.ext, ptr %gep.dst
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
   br i1 %exitcond.not, label %exit, label %loop
@@ -83,7 +83,7 @@ exit:
 ; Test with loop body that requires a large number of vector registers if the
 ; vectorization factor is large. Make sure the register estimates limit the
 ; vectorization factor.
-define void @test_load_i8_store_i64_large(i8* noalias %src, i64* noalias %dst, i64* noalias %dst.2, i64* noalias %dst.3, i64* noalias %dst.4, i64* noalias %dst.5, i64%off, i64 %off.2, i64 %N) {
+define void @test_load_i8_store_i64_large(ptr noalias %src, ptr noalias %dst, ptr noalias %dst.2, ptr noalias %dst.3, ptr noalias %dst.4, ptr noalias %dst.5, i64%off, i64 %off.2, i64 %N) {
 ; CHECK-LABEL: @test_load_i8_store_i64_large
 ; CHECK: <8 x i64>
 ;
@@ -92,28 +92,28 @@ entry:
 
 loop:
   %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
-  %gep.src = getelementptr inbounds i8, i8* %src, i64 %iv
-  %gep.dst.3 = getelementptr inbounds i64, i64* %dst.3, i64 %iv
-  %lv.dst.3 = load i64, i64* %gep.dst.3, align 1
-  %gep.dst.5 = getelementptr inbounds i64, i64* %dst.5, i64 %iv
-  %lv.dst.5 = load i64, i64* %gep.dst.3, align 1
+  %gep.src = getelementptr inbounds i8, ptr %src, i64 %iv
+  %gep.dst.3 = getelementptr inbounds i64, ptr %dst.3, i64 %iv
+  %lv.dst.3 = load i64, ptr %gep.dst.3, align 1
+  %gep.dst.5 = getelementptr inbounds i64, ptr %dst.5, i64 %iv
+  %lv.dst.5 = load i64, ptr %gep.dst.3, align 1
 
-  %lv = load i8, i8* %gep.src, align 1
+  %lv = load i8, ptr %gep.src, align 1
   %lv.ext = zext i8 %lv to i64
   %add = add i64 %lv.ext, %off
   %add.2 = add i64 %add, %off.2
-  %gep.dst = getelementptr inbounds i64, i64* %dst, i64 %iv
-  %gep.dst.2 = getelementptr inbounds i64, i64* %dst.2, i64 %iv
+  %gep.dst = getelementptr inbounds i64, ptr %dst, i64 %iv
+  %gep.dst.2 = getelementptr inbounds i64, ptr %dst.2, i64 %iv
 
   %add.3 = add i64 %add.2, %lv.dst.3
   %add.4 = add i64 %add.3, %add
-  %gep.dst.4 = getelementptr inbounds i64, i64* %dst.4, i64 %iv
+  %gep.dst.4 = getelementptr inbounds i64, ptr %dst.4, i64 %iv
   %add.5 = add i64 %add.2, %lv.dst.5
-  store i64 %add.2, i64* %gep.dst.2
-  store i64 %add, i64* %gep.dst
-  store i64 %add.3, i64* %gep.dst.3
-  store i64 %add.4, i64* %gep.dst.4
-  store i64 %add.5, i64* %gep.dst.5
+  store i64 %add.2, ptr %gep.dst.2
+  store i64 %add, ptr %gep.dst
+  store i64 %add.3, ptr %gep.dst.3
+  store i64 %add.4, ptr %gep.dst.4
+  store i64 %add.5, ptr %gep.dst.5
 
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll
index 4671c6fa7747e..8079c1e11ef54 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll
@@ -30,32 +30,32 @@ for.body:
   %b.054 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add30, %for.body ]
   %add = add i64 %v.055, %offset
   %mul = mul i64 %add, 3
-  %arrayidx = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %mul
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds [512 x float], [512 x float]* @kernel, i64 0, i64 %v.055
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 %mul
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds [512 x float], ptr @kernel, i64 0, i64 %v.055
+  %1 = load float, ptr %arrayidx2, align 4
   %mul3 = fmul fast float %0, %1
-  %arrayidx4 = getelementptr inbounds [512 x float], [512 x float]* @kernel2, i64 0, i64 %v.055
-  %2 = load float, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds [512 x float], ptr @kernel2, i64 0, i64 %v.055
+  %2 = load float, ptr %arrayidx4, align 4
   %mul5 = fmul fast float %mul3, %2
-  %arrayidx6 = getelementptr inbounds [512 x float], [512 x float]* @kernel3, i64 0, i64 %v.055
-  %3 = load float, float* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds [512 x float], ptr @kernel3, i64 0, i64 %v.055
+  %3 = load float, ptr %arrayidx6, align 4
   %mul7 = fmul fast float %mul5, %3
-  %arrayidx8 = getelementptr inbounds [512 x float], [512 x float]* @kernel4, i64 0, i64 %v.055
-  %4 = load float, float* %arrayidx8, align 4
+  %arrayidx8 = getelementptr inbounds [512 x float], ptr @kernel4, i64 0, i64 %v.055
+  %4 = load float, ptr %arrayidx8, align 4
   %mul9 = fmul fast float %mul7, %4
   %add10 = fadd fast float %r.057, %mul9
   %arrayidx.sum = add i64 %mul, 1
-  %arrayidx11 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum
-  %5 = load float, float* %arrayidx11, align 4
+  %arrayidx11 = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 %arrayidx.sum
+  %5 = load float, ptr %arrayidx11, align 4
   %mul13 = fmul fast float %1, %5
   %mul15 = fmul fast float %2, %mul13
   %mul17 = fmul fast float %3, %mul15
   %mul19 = fmul fast float %4, %mul17
   %add20 = fadd fast float %g.056, %mul19
   %arrayidx.sum52 = add i64 %mul, 2
-  %arrayidx21 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum52
-  %6 = load float, float* %arrayidx21, align 4
+  %arrayidx21 = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 %arrayidx.sum52
+  %6 = load float, ptr %arrayidx21, align 4
   %mul23 = fmul fast float %1, %6
   %mul25 = fmul fast float %2, %mul23
   %mul27 = fmul fast float %3, %mul25
@@ -78,8 +78,8 @@ for.end:
   %r.0.lcssa = phi i8 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
   %g.0.lcssa = phi i8 [ %phitmp60, %for.cond.for.end_crit_edge ], [ 0, %entry ]
   %b.0.lcssa = phi i8 [ %phitmp61, %for.cond.for.end_crit_edge ], [ 0, %entry ]
-  store i8 %r.0.lcssa, i8* @r_, align 1
-  store i8 %g.0.lcssa, i8* @g_, align 1
-  store i8 %b.0.lcssa, i8* @b_, align 1
+  store i8 %r.0.lcssa, ptr @r_, align 1
+  store i8 %g.0.lcssa, ptr @g_, align 1
+  store i8 %b.0.lcssa, ptr @b_, align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll
index 99a59095957e1..fe9631a8630f2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll
@@ -11,22 +11,22 @@ target triple = "aarch64--linux-gnu"
 ; CHECK-NEXT:    [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 5
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[INDUCTION]] to i32
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[INDUCTION1]] to i32
-; CHECK-NEXT:    [[GEP0:%.+]] = getelementptr inbounds i32, i32* %dst, i32 [[TMP4]]
-; CHECK-NEXT:    [[GEP1:%.+]] = getelementptr inbounds i32, i32* %dst, i32 [[TMP5]]
-; CHECK-NEXT:    store i32 0, i32* [[GEP0]], align 4
-; CHECK-NEXT:    store i32 0, i32* [[GEP1]], align 4
+; CHECK-NEXT:    [[GEP0:%.+]] = getelementptr inbounds i32, ptr %dst, i32 [[TMP4]]
+; CHECK-NEXT:    [[GEP1:%.+]] = getelementptr inbounds i32, ptr %dst, i32 [[TMP5]]
+; CHECK-NEXT:    store i32 0, ptr [[GEP0]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[GEP1]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK:         br i1 {{.*}}, label %middle.block, label %vector.body
 ;
-define void @non_primary_iv_trunc_free(i64 %n, i32* %dst) {
+define void @non_primary_iv_trunc_free(i64 %n, ptr %dst) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
   %tmp0 = trunc i64 %i to i32
-  %gep.dst = getelementptr inbounds i32, i32* %dst, i32 %tmp0
-  store i32 0, i32* %gep.dst
+  %gep.dst = getelementptr inbounds i32, ptr %dst, i32 %tmp0
+  store i32 0, ptr %gep.dst
   %i.next = add nuw nsw i64 %i, 5
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll
index 5646ce65d6c51..78781215bdd96 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll
@@ -2,26 +2,25 @@
 
 ; In the loop below, both the current and previous values of a first-order
 ; recurrence are stored in an interleave group.
-define void @interleaved_store_first_order_recurrence(i32* noalias %src, i32* %dst) {
+define void @interleaved_store_first_order_recurrence(ptr noalias %src, ptr %dst) {
 ; CHECK-LABEL: @interleaved_store_first_order_recurrence(
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
 ; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 99>, %vector.ph ], [ [[BROADCAST_SPLAT:%.*]], %vector.body ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[SRC:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[SRC:%.*]], align 4
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul nuw nsw i64 [[TMP0]], 3
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 2
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 -2
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <12 x i32>*
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 2
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -2
 ; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i32> zeroinitializer, <4 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> [[TMP10]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i32> [[TMP11]], <12 x i32> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-; CHECK-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[TMP8]], align 4
+; CHECK-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP12]], label %middle.block, label %vector.body
@@ -32,14 +31,14 @@ entry:
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
   %for = phi i32 [ 99, %entry ],[ %for.next, %loop ]
-  %for.next = load i32, i32* %src, align 4
+  %for.next = load i32, ptr %src, align 4
   %off = mul nuw nsw i64 %iv, 3
-  %gep.1 = getelementptr inbounds i32, i32* %dst, i64 %off
-  store i32 0, i32* %gep.1, align 4
-  %gep.2 = getelementptr inbounds i32, i32* %gep.1, i64 1
-  store i32 %for, i32* %gep.2, align 4
-  %gep.3 = getelementptr inbounds i32, i32* %gep.1, i64 2
-  store i32 %for.next, i32* %gep.3, align 4
+  %gep.1 = getelementptr inbounds i32, ptr %dst, i64 %off
+  store i32 0, ptr %gep.1, align 4
+  %gep.2 = getelementptr inbounds i32, ptr %gep.1, i64 1
+  store i32 %for, ptr %gep.2, align 4
+  %gep.3 = getelementptr inbounds i32, ptr %gep.1, i64 2
+  store i32 %for.next, ptr %gep.3, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %ec = icmp eq i64 %iv.next, 1000
   br i1 %ec, label %exit, label %loop

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll
index 4add3f67eed7f..0184daf5b6fe6 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll
@@ -17,19 +17,19 @@ target triple = "aarch64--linux-gnu"
 ; CHECK: insertelement <2 x i8> [[INSERT]], i8 [[LOAD2]], i32 1
 ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body
 
-define void @test(%pair* %p, i8* %q, i64 %n) {
+define void @test(ptr %p, ptr %q, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr %pair, %pair* %p, i64 %i, i32 0
-  %tmp1 = load i8, i8* %tmp0, align 1
-  %tmp2 = getelementptr %pair, %pair* %p, i64 %i, i32 1
-  %tmp3 = load i8, i8* %tmp2, align 1
+  %tmp0 = getelementptr %pair, ptr %p, i64 %i, i32 0
+  %tmp1 = load i8, ptr %tmp0, align 1
+  %tmp2 = getelementptr %pair, ptr %p, i64 %i, i32 1
+  %tmp3 = load i8, ptr %tmp2, align 1
   %add = add i8 %tmp1, %tmp3
-  %qi = getelementptr i8, i8* %q, i64 %i
-  store i8 %add, i8* %qi, align 1
+  %qi = getelementptr i8, ptr %q, i64 %i
+  store i8 %add, ptr %qi, align 1
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp eq i64 %i.next, %n
   br i1 %cond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
index 0d145ef32938d..c61ddbcb3408a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
@@ -8,28 +8,28 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-gnueabi"
 
 %i8.2 = type {i8, i8}
-define void @i8_factor_2(%i8.2* %data, i64 %n) {
+define void @i8_factor_2(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 ; VF_8-LABEL:  Checking a loop in 'i8_factor_2'
-; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1
+; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i8 0, ptr %tmp1, align 1
 ; VF_16-LABEL: Checking a loop in 'i8_factor_2'
-; VF_16:         Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 2 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1
+; VF_16:         Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 2 for VF 16 For instruction: store i8 0, ptr %tmp1, align 1
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 1
-  %tmp2 = load i8, i8* %tmp0, align 1
-  %tmp3 = load i8, i8* %tmp1, align 1
-  store i8 0, i8* %tmp0, align 1
-  store i8 0, i8* %tmp1, align 1
+  %tmp0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i8, ptr %tmp0, align 1
+  %tmp3 = load i8, ptr %tmp1, align 1
+  store i8 0, ptr %tmp0, align 1
+  store i8 0, ptr %tmp1, align 1
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -39,33 +39,33 @@ for.end:
 }
 
 %i16.2 = type {i16, i16}
-define void @i16_factor_2(%i16.2* %data, i64 %n) {
+define void @i16_factor_2(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 ; VF_4-LABEL: Checking a loop in 'i16_factor_2'
-; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i16 0, ptr %tmp1, align 2
 ; VF_8-LABEL:  Checking a loop in 'i16_factor_2'
-; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i16 0, ptr %tmp1, align 2
 ; VF_16-LABEL: Checking a loop in 'i16_factor_2'
-; VF_16:         Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 4 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_16:         Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 4 for VF 16 For instruction: store i16 0, ptr %tmp1, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 1
-  %tmp2 = load i16, i16* %tmp0, align 2
-  %tmp3 = load i16, i16* %tmp1, align 2
-  store i16 0, i16* %tmp0, align 2
-  store i16 0, i16* %tmp1, align 2
+  %tmp0 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i16, ptr %tmp0, align 2
+  %tmp3 = load i16, ptr %tmp1, align 2
+  store i16 0, ptr %tmp0, align 2
+  store i16 0, ptr %tmp1, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -75,38 +75,38 @@ for.end:
 }
 
 %i32.2 = type {i32, i32}
-define void @i32_factor_2(%i32.2* %data, i64 %n) {
+define void @i32_factor_2(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i32_factor_2'
-; VF_2:          Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 2 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_2:          Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 2 for VF 2 For instruction: store i32 0, ptr %tmp1, align 4
 ; VF_4-LABEL:  Checking a loop in 'i32_factor_2'
-; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i32 0, ptr %tmp1, align 4
 ; VF_8-LABEL:  Checking a loop in 'i32_factor_2'
-; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i32 0, ptr %tmp1, align 4
 ; VF_16-LABEL: Checking a loop in 'i32_factor_2'
-; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store i32 0, ptr %tmp1, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 1
-  %tmp2 = load i32, i32* %tmp0, align 4
-  %tmp3 = load i32, i32* %tmp1, align 4
-  store i32 0, i32* %tmp0, align 4
-  store i32 0, i32* %tmp1, align 4
+  %tmp0 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i32, ptr %tmp0, align 4
+  %tmp3 = load i32, ptr %tmp1, align 4
+  store i32 0, ptr %tmp0, align 4
+  store i32 0, ptr %tmp1, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -116,38 +116,38 @@ for.end:
 }
 
 %i64.2 = type {i64, i64}
-define void @i64_factor_2(%i64.2* %data, i64 %n) {
+define void @i64_factor_2(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i64_factor_2'
-; VF_2:          Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 2 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_2:          Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 2 for VF 2 For instruction: store i64 0, ptr %tmp1, align 8
 ; VF_4-LABEL:  Checking a loop in 'i64_factor_2'
-; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i64 0, ptr %tmp1, align 8
 ; VF_8-LABEL:  Checking a loop in 'i64_factor_2'
-; VF_8:          Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_8:          Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store i64 0, ptr %tmp1, align 8
 ; VF_16-LABEL: Checking a loop in 'i64_factor_2'
-; VF_16:         Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_16:         Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store i64 0, ptr %tmp1, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i64.2, %i64.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i64.2, %i64.2* %data, i64 %i, i32 1
-  %tmp2 = load i64, i64* %tmp0, align 8
-  %tmp3 = load i64, i64* %tmp1, align 8
-  store i64 0, i64* %tmp0, align 8
-  store i64 0, i64* %tmp1, align 8
+  %tmp0 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i64, ptr %tmp0, align 8
+  %tmp3 = load i64, ptr %tmp1, align 8
+  store i64 0, ptr %tmp0, align 8
+  store i64 0, ptr %tmp1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -157,7 +157,7 @@ for.end:
 }
 
 %i64.8 = type {i64, i64, i64, i64, i64, i64, i64, i64}
-define void @i64_factor_8(%i64.8* %data, i64 %n) {
+define void @i64_factor_8(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
@@ -168,18 +168,18 @@ entry:
 ; gaps.
 ;
 ; VF_2-LABEL: Checking a loop in 'i64_factor_8'
-; VF_2:         Found an estimated cost of 10 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
-; VF_2-NEXT:    Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
-; VF_2-NEXT:    Found an estimated cost of 7 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_2-NEXT:    Found an estimated cost of 7 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_2:         Found an estimated cost of 10 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_2-NEXT:    Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_2-NEXT:    Found an estimated cost of 7 for VF 2 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_2-NEXT:    Found an estimated cost of 7 for VF 2 For instruction: store i64 0, ptr %tmp1, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i64.8, %i64.8* %data, i64 %i, i32 2
-  %tmp1 = getelementptr inbounds %i64.8, %i64.8* %data, i64 %i, i32 6
-  %tmp2 = load i64, i64* %tmp0, align 8
-  %tmp3 = load i64, i64* %tmp1, align 8
-  store i64 0, i64* %tmp0, align 8
-  store i64 0, i64* %tmp1, align 8
+  %tmp0 = getelementptr inbounds %i64.8, ptr %data, i64 %i, i32 2
+  %tmp1 = getelementptr inbounds %i64.8, ptr %data, i64 %i, i32 6
+  %tmp2 = load i64, ptr %tmp0, align 8
+  %tmp3 = load i64, ptr %tmp1, align 8
+  store i64 0, ptr %tmp0, align 8
+  store i64 0, ptr %tmp1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
index 63d83a289bfdb..e35bc9734bd09 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
@@ -4,11 +4,11 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64"
 
 ; CHECK-LABEL: @add_a(
-; CHECK: load <16 x i8>, <16 x i8>*
+; CHECK: load <16 x i8>, ptr
 ; CHECK: add <16 x i8>
 ; CHECK: store <16 x i8>
 ; Function Attrs: nounwind
-define void @add_a(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
+define void @add_a(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
 entry:
   %cmp8 = icmp sgt i32 %len, 0
   br i1 %cmp8, label %for.body, label %for.cond.cleanup
@@ -18,13 +18,13 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx
+  %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx
   %conv = zext i8 %0 to i32
   %add = add nuw nsw i32 %conv, 2
   %conv1 = trunc i32 %add to i8
-  %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
-  store i8 %conv1, i8* %arrayidx3
+  %arrayidx3 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv
+  store i8 %conv1, ptr %arrayidx3
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %len
@@ -34,11 +34,11 @@ for.body:                                         ; preds = %entry, %for.body
 ; Ensure that we preserve nuw/nsw if we're not shrinking the values we're
 ; working with.
 ; CHECK-LABEL: @add_a1(
-; CHECK: load <16 x i8>, <16 x i8>*
+; CHECK: load <16 x i8>, ptr
 ; CHECK: add nuw nsw <16 x i8>
 ; CHECK: store <16 x i8>
 ; Function Attrs: nounwind
-define void @add_a1(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
+define void @add_a1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
 entry:
   %cmp8 = icmp sgt i32 %len, 0
   br i1 %cmp8, label %for.body, label %for.cond.cleanup
@@ -48,11 +48,11 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx
+  %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx
   %add = add nuw nsw i8 %0, 2
-  %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
-  store i8 %add, i8* %arrayidx3
+  %arrayidx3 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv
+  store i8 %add, ptr %arrayidx3
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %len
@@ -60,11 +60,11 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: @add_b(
-; CHECK: load <8 x i16>, <8 x i16>*
+; CHECK: load <8 x i16>, ptr
 ; CHECK: add <8 x i16>
 ; CHECK: store <8 x i16>
 ; Function Attrs: nounwind
-define void @add_b(i16* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 {
+define void @add_b(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
 entry:
   %cmp9 = icmp sgt i32 %len, 0
   br i1 %cmp9, label %for.body, label %for.cond.cleanup
@@ -74,13 +74,13 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv
-  %0 = load i16, i16* %arrayidx
+  %arrayidx = getelementptr inbounds i16, ptr %p, i64 %indvars.iv
+  %0 = load i16, ptr %arrayidx
   %conv8 = zext i16 %0 to i32
   %add = add nuw nsw i32 %conv8, 2
   %conv1 = trunc i32 %add to i16
-  %arrayidx3 = getelementptr inbounds i16, i16* %q, i64 %indvars.iv
-  store i16 %conv1, i16* %arrayidx3
+  %arrayidx3 = getelementptr inbounds i16, ptr %q, i64 %indvars.iv
+  store i16 %conv1, ptr %arrayidx3
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %len
@@ -88,11 +88,11 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: @add_c(
-; CHECK: load <8 x i8>, <8 x i8>*
+; CHECK: load <8 x i8>, ptr
 ; CHECK: add <8 x i16>
 ; CHECK: store <8 x i16>
 ; Function Attrs: nounwind
-define void @add_c(i8* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 {
+define void @add_c(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
 entry:
   %cmp8 = icmp sgt i32 %len, 0
   br i1 %cmp8, label %for.body, label %for.cond.cleanup
@@ -102,13 +102,13 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx
+  %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx
   %conv = zext i8 %0 to i32
   %add = add nuw nsw i32 %conv, 2
   %conv1 = trunc i32 %add to i16
-  %arrayidx3 = getelementptr inbounds i16, i16* %q, i64 %indvars.iv
-  store i16 %conv1, i16* %arrayidx3
+  %arrayidx3 = getelementptr inbounds i16, ptr %q, i64 %indvars.iv
+  store i16 %conv1, ptr %arrayidx3
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %len
@@ -119,7 +119,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: load <8 x i16>
 ; CHECK: add nsw <8 x i32>
 ; CHECK: store <8 x i32>
-define void @add_d(i16* noalias nocapture readonly %p, i32* noalias nocapture %q, i32 %len) #0 {
+define void @add_d(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
 entry:
   %cmp7 = icmp sgt i32 %len, 0
   br i1 %cmp7, label %for.body, label %for.cond.cleanup
@@ -129,12 +129,12 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv
-  %0 = load i16, i16* %arrayidx
+  %arrayidx = getelementptr inbounds i16, ptr %p, i64 %indvars.iv
+  %0 = load i16, ptr %arrayidx
   %conv = sext i16 %0 to i32
   %add = add nsw i32 %conv, 2
-  %arrayidx2 = getelementptr inbounds i32, i32* %q, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx2
+  %arrayidx2 = getelementptr inbounds i32, ptr %q, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %len
@@ -151,7 +151,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: xor <16 x i8>
 ; CHECK: mul <16 x i8>
 ; CHECK: store <16 x i8>
-define void @add_e(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
+define void @add_e(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
 entry:
   %cmp.32 = icmp sgt i32 %len, 0
   br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup
@@ -166,8 +166,8 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx
+  %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx
   %conv = zext i8 %0 to i32
   %add = shl i32 %conv, 4
   %conv2 = add nuw nsw i32 %add, 32
@@ -178,8 +178,8 @@ for.body:                                         ; preds = %for.body, %for.body
   %conv17 = xor i32 %mul.masked, %conv11
   %mul18 = mul nuw nsw i32 %conv17, %and
   %conv19 = trunc i32 %mul18 to i8
-  %arrayidx21 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
-  store i8 %conv19, i8* %arrayidx21
+  %arrayidx21 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv
+  store i8 %conv19, ptr %arrayidx21
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %len
@@ -197,7 +197,7 @@ for.body:                                         ; preds = %for.body, %for.body
 ; CHECK: xor <8 x i8>
 ; CHECK: mul <8 x i8>
 ; CHECK: store <8 x i8>
-define void @add_f(i16* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
+define void @add_f(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
 entry:
   %cmp.32 = icmp sgt i32 %len, 0
   br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup
@@ -212,8 +212,8 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv
-  %0 = load i16, i16* %arrayidx
+  %arrayidx = getelementptr inbounds i16, ptr %p, i64 %indvars.iv
+  %0 = load i16, ptr %arrayidx
   %conv = sext i16 %0 to i32
   %add = shl i32 %conv, 4
   %conv2 = add nsw i32 %add, 32
@@ -225,8 +225,8 @@ for.body:                                         ; preds = %for.body, %for.body
   %conv17 = xor i32 %mul.masked, %conv11
   %mul18 = mul nuw nsw i32 %conv17, %and
   %conv19 = trunc i32 %mul18 to i8
-  %arrayidx21 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
-  store i8 %conv19, i8* %arrayidx21
+  %arrayidx21 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv
+  store i8 %conv19, ptr %arrayidx21
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %len
@@ -234,11 +234,11 @@ for.body:                                         ; preds = %for.body, %for.body
 }
 
 ; CHECK-LABEL: @add_phifail(
-; CHECK: load <16 x i8>, <16 x i8>*
+; CHECK: load <16 x i8>, ptr
 ; CHECK: add nuw nsw <16 x i32>
 ; CHECK: store <16 x i8>
 ; Function Attrs: nounwind
-define void @add_phifail(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
+define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
 entry:
   %cmp8 = icmp sgt i32 %len, 0
   br i1 %cmp8, label %for.body, label %for.cond.cleanup
@@ -249,13 +249,13 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %a_phi = phi i32 [ %conv, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx
+  %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx
   %conv = zext i8 %0 to i32
   %add = add nuw nsw i32 %conv, 2
   %conv1 = trunc i32 %add to i8
-  %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
-  store i8 %conv1, i8* %arrayidx3
+  %arrayidx3 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv
+  store i8 %conv1, ptr %arrayidx3
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %len
@@ -267,10 +267,10 @@ for.body:                                         ; preds = %entry, %for.body
 ; even when %len exactly divides VF (since we extract from the second last index
 ; and pass this to the for.cond.cleanup block). Vectorized loop returns
 ; the correct value a_phi = p[len -2]
-define i8 @add_phifail2(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
+define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
 ; CHECK-LABEL: @add_phifail2(
 ; CHECK: vector.body:
-; CHECK:   %wide.load = load <16 x i8>, <16 x i8>*
+; CHECK:   %wide.load = load <16 x i8>, ptr
 ; CHECK:   %[[L1:.+]] = zext <16 x i8> %wide.load to <16 x i32>
 ; CHECK:   add nuw nsw <16 x i32>
 ; CHECK:   store <16 x i8>
@@ -293,13 +293,13 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %a_phi = phi i32 [ %conv, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx
+  %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx
   %conv = zext i8 %0 to i32
   %add = add nuw nsw i32 %conv, 2
   %conv1 = trunc i32 %add to i8
-  %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
-  store i8 %conv1, i8* %arrayidx3
+  %arrayidx3 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv
+  store i8 %conv1, ptr %arrayidx3
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %len

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/loopvectorize_pr33804_double.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loopvectorize_pr33804_double.ll
index b1f0b27537ba7..263da076fbb4e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/loopvectorize_pr33804_double.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/loopvectorize_pr33804_double.ll
@@ -10,22 +10,21 @@ source_filename = "bugpoint-output-26dbd81.bc"
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-unknown-linux-gnu"
 
-%struct.CvNode1D = type { double, %struct.CvNode1D* }
+%struct.CvNode1D = type { double, ptr }
 
 ; CHECK-LABEL: @cvCalcEMD2
 ; CHECK: vector.body
-; CHECK: store <{{[0-9]+}} x %struct.CvNode1D*>
-define void @cvCalcEMD2() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK: store <{{[0-9]+}} x ptr>
+define void @cvCalcEMD2() local_unnamed_addr #0 personality ptr @__gxx_personality_v0 {
 entry:
   br label %for.body14.i.i
 
 for.body14.i.i:                                   ; preds = %for.body14.i.i, %entry
   %i.1424.i.i = phi i32 [ %inc21.i.i, %for.body14.i.i ], [ 0, %entry ]
-  %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* undef, i32 %i.1424.i.i
-  %val.i.i = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* %arrayidx15.i.i1427, i32 0, i32 0
-  store double 0xC415AF1D80000000, double* %val.i.i, align 4
-  %next19.i.i = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* undef, i32 %i.1424.i.i, i32 1
-  store %struct.CvNode1D* undef, %struct.CvNode1D** %next19.i.i, align 4
+  %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D, ptr undef, i32 %i.1424.i.i
+  store double 0xC415AF1D80000000, ptr %arrayidx15.i.i1427, align 4
+  %next19.i.i = getelementptr inbounds %struct.CvNode1D, ptr undef, i32 %i.1424.i.i, i32 1
+  store ptr undef, ptr %next19.i.i, align 4
   %inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1
   %exitcond438.i.i = icmp eq i32 %inc21.i.i, 0
   br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i
@@ -36,22 +35,22 @@ for.end22.i.i:                                    ; preds = %for.body14.i.i
 
 ; This test checks when a pointer value is stored into a double type.
 
-%struct.CvNode1D2 = type { %struct.CvNode1D2*, double }
+%struct.CvNode1D2 = type { ptr, double }
 
 ; CHECK-LABEL: @cvCalcEMD2_2
 ; CHECK: vector.body
 ; CHECK: store <{{[0-9]+}} x double>
-define void @cvCalcEMD2_2() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define void @cvCalcEMD2_2() local_unnamed_addr #0 personality ptr @__gxx_personality_v0 {
 entry:
   br label %for.body14.i.i
 
 for.body14.i.i:                                   ; preds = %for.body14.i.i, %entry
   %i.1424.i.i = phi i32 [ %inc21.i.i, %for.body14.i.i ], [ 0, %entry ]
-  %next19.i.i = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* undef, i32 %i.1424.i.i, i32 0
-  store %struct.CvNode1D2* undef, %struct.CvNode1D2** %next19.i.i, align 4
-  %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* undef, i32 %i.1424.i.i
-  %val.i.i = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* %arrayidx15.i.i1427, i32 0, i32 1
-  store double 0xC415AF1D80000000, double* %val.i.i, align 4
+  %next19.i.i = getelementptr inbounds %struct.CvNode1D2, ptr undef, i32 %i.1424.i.i, i32 0
+  store ptr undef, ptr %next19.i.i, align 4
+  %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D2, ptr undef, i32 %i.1424.i.i
+  %val.i.i = getelementptr inbounds %struct.CvNode1D2, ptr %arrayidx15.i.i1427, i32 0, i32 1
+  store double 0xC415AF1D80000000, ptr %val.i.i, align 4
   %inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1
   %exitcond438.i.i = icmp eq i32 %inc21.i.i, 0
   br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i
@@ -65,17 +64,16 @@ for.end22.i.i:                                    ; preds = %for.body14.i.i
 ; CHECK-LABEL: @cvCalcEMD3
 ; CHECK: vector.body
 ; CHECK: inttoptr <{{[0-9]+}} x i64>
-define void @cvCalcEMD3() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define void @cvCalcEMD3() local_unnamed_addr #0 personality ptr @__gxx_personality_v0 {
 entry:
   br label %for.body14.i.i
 
 for.body14.i.i:                                   ; preds = %for.body14.i.i, %entry
   %i.1424.i.i = phi i32 [ %inc21.i.i, %for.body14.i.i ], [ 0, %entry ]
-  %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* undef, i32 %i.1424.i.i
-  %val.i.i = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* %arrayidx15.i.i1427, i32 0, i32 0
-  %load_d = load double, double* %val.i.i, align 4
-  %next19.i.i = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* undef, i32 %i.1424.i.i, i32 1
-  %load_p = load %struct.CvNode1D*, %struct.CvNode1D** %next19.i.i, align 4
+  %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D, ptr undef, i32 %i.1424.i.i
+  %load_d = load double, ptr %arrayidx15.i.i1427, align 4
+  %next19.i.i = getelementptr inbounds %struct.CvNode1D, ptr undef, i32 %i.1424.i.i, i32 1
+  %load_p = load ptr, ptr %next19.i.i, align 4
   %inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1
   %exitcond438.i.i = icmp eq i32 %inc21.i.i, 0
   br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i
@@ -88,18 +86,18 @@ for.end22.i.i:                                    ; preds = %for.body14.i.i
 
 ; CHECK-LABEL: @cvCalcEMD3_2
 ; CHECK: vector.body
-; CHECK: ptrtoint <{{[0-9]+}} x %struct.CvNode1D2*>
-define void @cvCalcEMD3_2() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK: ptrtoint <{{[0-9]+}} x ptr>
+define void @cvCalcEMD3_2() local_unnamed_addr #0 personality ptr @__gxx_personality_v0 {
 entry:
   br label %for.body14.i.i
 
 for.body14.i.i:                                   ; preds = %for.body14.i.i, %entry
   %i.1424.i.i = phi i32 [ %inc21.i.i, %for.body14.i.i ], [ 0, %entry ]
-  %next19.i.i = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* undef, i32 %i.1424.i.i, i32 0
-  %load_p = load %struct.CvNode1D2*, %struct.CvNode1D2** %next19.i.i, align 4
-  %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* undef, i32 %i.1424.i.i
-  %val.i.i = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* %arrayidx15.i.i1427, i32 0, i32 1
-  %load_d = load double, double* %val.i.i, align 4
+  %next19.i.i = getelementptr inbounds %struct.CvNode1D2, ptr undef, i32 %i.1424.i.i, i32 0
+  %load_p = load ptr, ptr %next19.i.i, align 4
+  %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D2, ptr undef, i32 %i.1424.i.i
+  %val.i.i = getelementptr inbounds %struct.CvNode1D2, ptr %arrayidx15.i.i1427, i32 0, i32 1
+  %load_d = load double, ptr %val.i.i, align 4
   %inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1
   %exitcond438.i.i = icmp eq i32 %inc21.i.i, 0
   br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
index eb967689a0b8e..d1e9f2567faeb 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
@@ -7,7 +7,7 @@ target triple = "aarch64-unknown-linux-gnu"
 
 ; A call whose argument must be widened. We check that tail folding uses the
 ; primary mask, and that without tail folding we synthesize an all-true mask.
-define void @test_widen(i64* noalias %a, i64* readnone %b) #4 {
+define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
 ; TFNONE-LABEL: @test_widen(
 ; TFNONE-NEXT:  entry:
 ; TFNONE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -15,18 +15,16 @@ define void @test_widen(i64* noalias %a, i64* readnone %b) #4 {
 ; TFNONE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TFNONE:       vector.body:
 ; TFNONE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; TFNONE-NEXT:    [[TMP0:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDEX]]
-; TFNONE-NEXT:    [[TMP1:%.*]] = bitcast i64* [[TMP0]] to <2 x i64>*
-; TFNONE-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4
+; TFNONE-NEXT:    [[TMP0:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]]
+; TFNONE-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4
 ; TFNONE-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
 ; TFNONE-NEXT:    [[TMP3:%.*]] = call i64 @foo(i64 [[TMP2]]) #[[ATTR2:[0-9]+]]
 ; TFNONE-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
 ; TFNONE-NEXT:    [[TMP5:%.*]] = call i64 @foo(i64 [[TMP4]]) #[[ATTR2]]
 ; TFNONE-NEXT:    [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
 ; TFNONE-NEXT:    [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[TMP5]], i32 1
-; TFNONE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
-; TFNONE-NEXT:    [[TMP9:%.*]] = bitcast i64* [[TMP8]] to <2 x i64>*
-; TFNONE-NEXT:    store <2 x i64> [[TMP7]], <2 x i64>* [[TMP9]], align 4
+; TFNONE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; TFNONE-NEXT:    store <2 x i64> [[TMP7]], ptr [[TMP8]], align 4
 ; TFNONE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; TFNONE-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; TFNONE-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -37,11 +35,11 @@ define void @test_widen(i64* noalias %a, i64* readnone %b) #4 {
 ; TFNONE-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFNONE:       for.body:
 ; TFNONE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; TFNONE-NEXT:    [[GEP:%.*]] = getelementptr i64, i64* [[B]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT:    [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4
+; TFNONE-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
+; TFNONE-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4
 ; TFNONE-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR2]]
-; TFNONE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT:    store i64 [[CALL]], i64* [[ARRAYIDX]], align 4
+; TFNONE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; TFNONE-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 4
 ; TFNONE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFNONE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFNONE-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -53,11 +51,11 @@ define void @test_widen(i64* noalias %a, i64* readnone %b) #4 {
 ; TFALWAYS-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFALWAYS:       for.body:
 ; TFALWAYS-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; TFALWAYS-NEXT:    [[GEP:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDVARS_IV]]
-; TFALWAYS-NEXT:    [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4
+; TFALWAYS-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; TFALWAYS-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4
 ; TFALWAYS-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR1:[0-9]+]]
-; TFALWAYS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]]
-; TFALWAYS-NEXT:    store i64 [[CALL]], i64* [[ARRAYIDX]], align 4
+; TFALWAYS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; TFALWAYS-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 4
 ; TFALWAYS-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFALWAYS-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFALWAYS-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -71,18 +69,16 @@ define void @test_widen(i64* noalias %a, i64* readnone %b) #4 {
 ; TFFALLBACK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TFFALLBACK:       vector.body:
 ; TFFALLBACK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; TFFALLBACK-NEXT:    [[TMP0:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDEX]]
-; TFFALLBACK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[TMP0]] to <2 x i64>*
-; TFFALLBACK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4
+; TFFALLBACK-NEXT:    [[TMP0:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]]
+; TFFALLBACK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4
 ; TFFALLBACK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
 ; TFFALLBACK-NEXT:    [[TMP3:%.*]] = call i64 @foo(i64 [[TMP2]]) #[[ATTR2:[0-9]+]]
 ; TFFALLBACK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
 ; TFFALLBACK-NEXT:    [[TMP5:%.*]] = call i64 @foo(i64 [[TMP4]]) #[[ATTR2]]
 ; TFFALLBACK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
 ; TFFALLBACK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[TMP5]], i32 1
-; TFFALLBACK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
-; TFFALLBACK-NEXT:    [[TMP9:%.*]] = bitcast i64* [[TMP8]] to <2 x i64>*
-; TFFALLBACK-NEXT:    store <2 x i64> [[TMP7]], <2 x i64>* [[TMP9]], align 4
+; TFFALLBACK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; TFFALLBACK-NEXT:    store <2 x i64> [[TMP7]], ptr [[TMP8]], align 4
 ; TFFALLBACK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; TFFALLBACK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; TFFALLBACK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -93,11 +89,11 @@ define void @test_widen(i64* noalias %a, i64* readnone %b) #4 {
 ; TFFALLBACK-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFFALLBACK:       for.body:
 ; TFFALLBACK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; TFFALLBACK-NEXT:    [[GEP:%.*]] = getelementptr i64, i64* [[B]], i64 [[INDVARS_IV]]
-; TFFALLBACK-NEXT:    [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4
+; TFFALLBACK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
+; TFFALLBACK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4
 ; TFFALLBACK-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR2]]
-; TFFALLBACK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDVARS_IV]]
-; TFFALLBACK-NEXT:    store i64 [[CALL]], i64* [[ARRAYIDX]], align 4
+; TFFALLBACK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; TFFALLBACK-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 4
 ; TFFALLBACK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFFALLBACK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFFALLBACK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -109,11 +105,11 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %gep = getelementptr i64, i64* %b, i64 %indvars.iv
-  %load = load i64, i64* %gep
+  %gep = getelementptr i64, ptr %b, i64 %indvars.iv
+  %load = load i64, ptr %gep
   %call = call i64 @foo(i64 %load) #1
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %indvars.iv
-  store i64 %call, i64* %arrayidx
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  store i64 %call, ptr %arrayidx
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -123,14 +119,14 @@ for.cond.cleanup:
 }
 
 ; Check that a simple conditional call can be vectorized.
-define void @test_if_then(i64* noalias %a, i64* readnone %b) #4 {
+define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
 ; TFNONE-LABEL: @test_if_then(
 ; TFNONE-NEXT:  entry:
 ; TFNONE-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFNONE:       for.body:
 ; TFNONE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; TFNONE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT:    [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; TFNONE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; TFNONE-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; TFNONE-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 50
 ; TFNONE-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END]]
 ; TFNONE:       if.then:
@@ -138,8 +134,8 @@ define void @test_if_then(i64* noalias %a, i64* readnone %b) #4 {
 ; TFNONE-NEXT:    br label [[IF_END]]
 ; TFNONE:       if.end:
 ; TFNONE-NEXT:    [[TMP2:%.*]] = phi i64 [ [[TMP1]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ]
-; TFNONE-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT:    store i64 [[TMP2]], i64* [[ARRAYIDX1]], align 8
+; TFNONE-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; TFNONE-NEXT:    store i64 [[TMP2]], ptr [[ARRAYIDX1]], align 8
 ; TFNONE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFNONE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFNONE-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -151,8 +147,8 @@ define void @test_if_then(i64* noalias %a, i64* readnone %b) #4 {
 ; TFALWAYS-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFALWAYS:       for.body:
 ; TFALWAYS-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; TFALWAYS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]]
-; TFALWAYS-NEXT:    [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; TFALWAYS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; TFALWAYS-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; TFALWAYS-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 50
 ; TFALWAYS-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END]]
 ; TFALWAYS:       if.then:
@@ -160,8 +156,8 @@ define void @test_if_then(i64* noalias %a, i64* readnone %b) #4 {
 ; TFALWAYS-NEXT:    br label [[IF_END]]
 ; TFALWAYS:       if.end:
 ; TFALWAYS-NEXT:    [[TMP2:%.*]] = phi i64 [ [[TMP1]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ]
-; TFALWAYS-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDVARS_IV]]
-; TFALWAYS-NEXT:    store i64 [[TMP2]], i64* [[ARRAYIDX1]], align 8
+; TFALWAYS-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; TFALWAYS-NEXT:    store i64 [[TMP2]], ptr [[ARRAYIDX1]], align 8
 ; TFALWAYS-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFALWAYS-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFALWAYS-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -173,8 +169,8 @@ define void @test_if_then(i64* noalias %a, i64* readnone %b) #4 {
 ; TFFALLBACK-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFFALLBACK:       for.body:
 ; TFFALLBACK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; TFFALLBACK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]]
-; TFFALLBACK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; TFFALLBACK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; TFFALLBACK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; TFFALLBACK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 50
 ; TFFALLBACK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END]]
 ; TFFALLBACK:       if.then:
@@ -182,8 +178,8 @@ define void @test_if_then(i64* noalias %a, i64* readnone %b) #4 {
 ; TFFALLBACK-NEXT:    br label [[IF_END]]
 ; TFFALLBACK:       if.end:
 ; TFFALLBACK-NEXT:    [[TMP2:%.*]] = phi i64 [ [[TMP1]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ]
-; TFFALLBACK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDVARS_IV]]
-; TFFALLBACK-NEXT:    store i64 [[TMP2]], i64* [[ARRAYIDX1]], align 8
+; TFFALLBACK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; TFFALLBACK-NEXT:    store i64 [[TMP2]], ptr [[ARRAYIDX1]], align 8
 ; TFFALLBACK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFFALLBACK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFFALLBACK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -195,8 +191,8 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %if.end ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %indvars.iv
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx, align 8
   %cmp = icmp ugt i64 %0, 50
   br i1 %cmp, label %if.then, label %if.end
 
@@ -206,8 +202,8 @@ if.then:
 
 if.end:
   %2 = phi i64 [%1, %if.then], [0, %for.body]
-  %arrayidx1 = getelementptr inbounds i64, i64* %b, i64 %indvars.iv
-  store i64 %2, i64* %arrayidx1, align 8
+  %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %indvars.iv
+  store i64 %2, ptr %arrayidx1, align 8
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -220,14 +216,14 @@ for.cond.cleanup:
 ; calls inside the conditional blocks. Although one of the calls has a
 ; uniform parameter and the metadata lists a uniform variant, right now
 ; we just see a splat of the parameter instead. More work needed.
-define void @test_widen_if_then_else(i64* noalias %a, i64* readnone %b) #4 {
+define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
 ; TFNONE-LABEL: @test_widen_if_then_else(
 ; TFNONE-NEXT:  entry:
 ; TFNONE-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFNONE:       for.body:
 ; TFNONE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; TFNONE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT:    [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; TFNONE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; TFNONE-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; TFNONE-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 50
 ; TFNONE-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; TFNONE:       if.then:
@@ -238,8 +234,8 @@ define void @test_widen_if_then_else(i64* noalias %a, i64* readnone %b) #4 {
 ; TFNONE-NEXT:    br label [[IF_END]]
 ; TFNONE:       if.end:
 ; TFNONE-NEXT:    [[TMP3:%.*]] = phi i64 [ [[TMP1]], [[IF_THEN]] ], [ [[TMP2]], [[IF_ELSE]] ]
-; TFNONE-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT:    store i64 [[TMP3]], i64* [[ARRAYIDX1]], align 8
+; TFNONE-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; TFNONE-NEXT:    store i64 [[TMP3]], ptr [[ARRAYIDX1]], align 8
 ; TFNONE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFNONE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFNONE-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -251,8 +247,8 @@ define void @test_widen_if_then_else(i64* noalias %a, i64* readnone %b) #4 {
 ; TFALWAYS-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFALWAYS:       for.body:
 ; TFALWAYS-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; TFALWAYS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]]
-; TFALWAYS-NEXT:    [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; TFALWAYS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; TFALWAYS-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; TFALWAYS-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 50
 ; TFALWAYS-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; TFALWAYS:       if.then:
@@ -263,8 +259,8 @@ define void @test_widen_if_then_else(i64* noalias %a, i64* readnone %b) #4 {
 ; TFALWAYS-NEXT:    br label [[IF_END]]
 ; TFALWAYS:       if.end:
 ; TFALWAYS-NEXT:    [[TMP3:%.*]] = phi i64 [ [[TMP1]], [[IF_THEN]] ], [ [[TMP2]], [[IF_ELSE]] ]
-; TFALWAYS-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDVARS_IV]]
-; TFALWAYS-NEXT:    store i64 [[TMP3]], i64* [[ARRAYIDX1]], align 8
+; TFALWAYS-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; TFALWAYS-NEXT:    store i64 [[TMP3]], ptr [[ARRAYIDX1]], align 8
 ; TFALWAYS-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFALWAYS-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFALWAYS-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -276,8 +272,8 @@ define void @test_widen_if_then_else(i64* noalias %a, i64* readnone %b) #4 {
 ; TFFALLBACK-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFFALLBACK:       for.body:
 ; TFFALLBACK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; TFFALLBACK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]]
-; TFFALLBACK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; TFFALLBACK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; TFFALLBACK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; TFFALLBACK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 50
 ; TFFALLBACK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; TFFALLBACK:       if.then:
@@ -288,8 +284,8 @@ define void @test_widen_if_then_else(i64* noalias %a, i64* readnone %b) #4 {
 ; TFFALLBACK-NEXT:    br label [[IF_END]]
 ; TFFALLBACK:       if.end:
 ; TFFALLBACK-NEXT:    [[TMP3:%.*]] = phi i64 [ [[TMP1]], [[IF_THEN]] ], [ [[TMP2]], [[IF_ELSE]] ]
-; TFFALLBACK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDVARS_IV]]
-; TFFALLBACK-NEXT:    store i64 [[TMP3]], i64* [[ARRAYIDX1]], align 8
+; TFFALLBACK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; TFFALLBACK-NEXT:    store i64 [[TMP3]], ptr [[ARRAYIDX1]], align 8
 ; TFFALLBACK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFFALLBACK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFFALLBACK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -301,8 +297,8 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %if.end ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %indvars.iv
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx, align 8
   %cmp = icmp ugt i64 %0, 50
   br i1 %cmp, label %if.then, label %if.else
 
@@ -316,8 +312,8 @@ if.else:
 
 if.end:
   %3 = phi i64 [%1, %if.then], [%2, %if.else]
-  %arrayidx1 = getelementptr inbounds i64, i64* %b, i64 %indvars.iv
-  store i64 %3, i64* %arrayidx1, align 8
+  %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %indvars.iv
+  store i64 %3, ptr %arrayidx1, align 8
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -329,7 +325,7 @@ for.cond.cleanup:
 ; A call whose argument must be widened, where the vector variant does not have
 ; a mask. Forcing tail folding results in no vectorized call, whereas an
 ; unpredicated body with scalar tail can use the unmasked variant.
-define void @test_widen_nomask(i64* noalias %a, i64* readnone %b) #4 {
+define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
 ; TFNONE-LABEL: @test_widen_nomask(
 ; TFNONE-NEXT:  entry:
 ; TFNONE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -344,13 +340,11 @@ define void @test_widen_nomask(i64* noalias %a, i64* readnone %b) #4 {
 ; TFNONE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TFNONE:       vector.body:
 ; TFNONE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; TFNONE-NEXT:    [[TMP4:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDEX]]
-; TFNONE-NEXT:    [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <vscale x 2 x i64>*
-; TFNONE-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP5]], align 4
+; TFNONE-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]]
+; TFNONE-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP4]], align 4
 ; TFNONE-NEXT:    [[TMP6:%.*]] = call <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64> [[WIDE_LOAD]])
-; TFNONE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
-; TFNONE-NEXT:    [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <vscale x 2 x i64>*
-; TFNONE-NEXT:    store <vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64>* [[TMP8]], align 4
+; TFNONE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; TFNONE-NEXT:    store <vscale x 2 x i64> [[TMP6]], ptr [[TMP7]], align 4
 ; TFNONE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
 ; TFNONE-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 2
 ; TFNONE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
@@ -364,11 +358,11 @@ define void @test_widen_nomask(i64* noalias %a, i64* readnone %b) #4 {
 ; TFNONE-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFNONE:       for.body:
 ; TFNONE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; TFNONE-NEXT:    [[GEP:%.*]] = getelementptr i64, i64* [[B]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT:    [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4
+; TFNONE-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
+; TFNONE-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4
 ; TFNONE-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR4:[0-9]+]]
-; TFNONE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT:    store i64 [[CALL]], i64* [[ARRAYIDX]], align 4
+; TFNONE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; TFNONE-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 4
 ; TFNONE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFNONE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFNONE-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -380,11 +374,11 @@ define void @test_widen_nomask(i64* noalias %a, i64* readnone %b) #4 {
 ; TFALWAYS-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFALWAYS:       for.body:
 ; TFALWAYS-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; TFALWAYS-NEXT:    [[GEP:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDVARS_IV]]
-; TFALWAYS-NEXT:    [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4
+; TFALWAYS-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; TFALWAYS-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4
 ; TFALWAYS-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR3:[0-9]+]]
-; TFALWAYS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]]
-; TFALWAYS-NEXT:    store i64 [[CALL]], i64* [[ARRAYIDX]], align 4
+; TFALWAYS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; TFALWAYS-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 4
 ; TFALWAYS-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFALWAYS-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFALWAYS-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -405,13 +399,11 @@ define void @test_widen_nomask(i64* noalias %a, i64* readnone %b) #4 {
 ; TFFALLBACK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TFFALLBACK:       vector.body:
 ; TFFALLBACK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; TFFALLBACK-NEXT:    [[TMP4:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDEX]]
-; TFFALLBACK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <vscale x 2 x i64>*
-; TFFALLBACK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP5]], align 4
+; TFFALLBACK-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]]
+; TFFALLBACK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP4]], align 4
 ; TFFALLBACK-NEXT:    [[TMP6:%.*]] = call <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64> [[WIDE_LOAD]])
-; TFFALLBACK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
-; TFFALLBACK-NEXT:    [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <vscale x 2 x i64>*
-; TFFALLBACK-NEXT:    store <vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64>* [[TMP8]], align 4
+; TFFALLBACK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; TFFALLBACK-NEXT:    store <vscale x 2 x i64> [[TMP6]], ptr [[TMP7]], align 4
 ; TFFALLBACK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
 ; TFFALLBACK-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 2
 ; TFFALLBACK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
@@ -425,11 +417,11 @@ define void @test_widen_nomask(i64* noalias %a, i64* readnone %b) #4 {
 ; TFFALLBACK-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFFALLBACK:       for.body:
 ; TFFALLBACK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; TFFALLBACK-NEXT:    [[GEP:%.*]] = getelementptr i64, i64* [[B]], i64 [[INDVARS_IV]]
-; TFFALLBACK-NEXT:    [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4
+; TFFALLBACK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
+; TFFALLBACK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4
 ; TFFALLBACK-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR4:[0-9]+]]
-; TFFALLBACK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDVARS_IV]]
-; TFFALLBACK-NEXT:    store i64 [[CALL]], i64* [[ARRAYIDX]], align 4
+; TFFALLBACK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; TFFALLBACK-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 4
 ; TFFALLBACK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFFALLBACK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFFALLBACK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -441,11 +433,11 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %gep = getelementptr i64, i64* %b, i64 %indvars.iv
-  %load = load i64, i64* %gep
+  %gep = getelementptr i64, ptr %b, i64 %indvars.iv
+  %load = load i64, ptr %gep
   %call = call i64 @foo(i64 %load) #2
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %indvars.iv
-  store i64 %call, i64* %arrayidx
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  store i64 %call, ptr %arrayidx
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -457,7 +449,7 @@ for.cond.cleanup:
 ; If both masked and unmasked options are present, we expect to see tail folding
 ; use the masked version and unpredicated body with scalar tail use the unmasked
 ; version.
-define void @test_widen_optmask(i64* noalias %a, i64* readnone %b) #4 {
+define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
 ; TFNONE-LABEL: @test_widen_optmask(
 ; TFNONE-NEXT:  entry:
 ; TFNONE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -472,13 +464,11 @@ define void @test_widen_optmask(i64* noalias %a, i64* readnone %b) #4 {
 ; TFNONE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TFNONE:       vector.body:
 ; TFNONE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; TFNONE-NEXT:    [[TMP4:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDEX]]
-; TFNONE-NEXT:    [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <vscale x 2 x i64>*
-; TFNONE-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP5]], align 4
+; TFNONE-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]]
+; TFNONE-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP4]], align 4
 ; TFNONE-NEXT:    [[TMP6:%.*]] = call <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64> [[WIDE_LOAD]])
-; TFNONE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
-; TFNONE-NEXT:    [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <vscale x 2 x i64>*
-; TFNONE-NEXT:    store <vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64>* [[TMP8]], align 4
+; TFNONE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; TFNONE-NEXT:    store <vscale x 2 x i64> [[TMP6]], ptr [[TMP7]], align 4
 ; TFNONE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
 ; TFNONE-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 2
 ; TFNONE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
@@ -492,11 +482,11 @@ define void @test_widen_optmask(i64* noalias %a, i64* readnone %b) #4 {
 ; TFNONE-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFNONE:       for.body:
 ; TFNONE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; TFNONE-NEXT:    [[GEP:%.*]] = getelementptr i64, i64* [[B]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT:    [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4
+; TFNONE-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
+; TFNONE-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4
 ; TFNONE-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]]
-; TFNONE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT:    store i64 [[CALL]], i64* [[ARRAYIDX]], align 4
+; TFNONE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; TFNONE-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 4
 ; TFNONE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFNONE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFNONE-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -508,11 +498,11 @@ define void @test_widen_optmask(i64* noalias %a, i64* readnone %b) #4 {
 ; TFALWAYS-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFALWAYS:       for.body:
 ; TFALWAYS-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; TFALWAYS-NEXT:    [[GEP:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDVARS_IV]]
-; TFALWAYS-NEXT:    [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4
+; TFALWAYS-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; TFALWAYS-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4
 ; TFALWAYS-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR4:[0-9]+]]
-; TFALWAYS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]]
-; TFALWAYS-NEXT:    store i64 [[CALL]], i64* [[ARRAYIDX]], align 4
+; TFALWAYS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; TFALWAYS-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 4
 ; TFALWAYS-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFALWAYS-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFALWAYS-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -533,13 +523,11 @@ define void @test_widen_optmask(i64* noalias %a, i64* readnone %b) #4 {
 ; TFFALLBACK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TFFALLBACK:       vector.body:
 ; TFFALLBACK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; TFFALLBACK-NEXT:    [[TMP4:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDEX]]
-; TFFALLBACK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <vscale x 2 x i64>*
-; TFFALLBACK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP5]], align 4
+; TFFALLBACK-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]]
+; TFFALLBACK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP4]], align 4
 ; TFFALLBACK-NEXT:    [[TMP6:%.*]] = call <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64> [[WIDE_LOAD]])
-; TFFALLBACK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
-; TFFALLBACK-NEXT:    [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <vscale x 2 x i64>*
-; TFFALLBACK-NEXT:    store <vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64>* [[TMP8]], align 4
+; TFFALLBACK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; TFFALLBACK-NEXT:    store <vscale x 2 x i64> [[TMP6]], ptr [[TMP7]], align 4
 ; TFFALLBACK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
 ; TFFALLBACK-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 2
 ; TFFALLBACK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
@@ -553,11 +541,11 @@ define void @test_widen_optmask(i64* noalias %a, i64* readnone %b) #4 {
 ; TFFALLBACK-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFFALLBACK:       for.body:
 ; TFFALLBACK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; TFFALLBACK-NEXT:    [[GEP:%.*]] = getelementptr i64, i64* [[B]], i64 [[INDVARS_IV]]
-; TFFALLBACK-NEXT:    [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4
+; TFFALLBACK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
+; TFFALLBACK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4
 ; TFFALLBACK-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]]
-; TFFALLBACK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDVARS_IV]]
-; TFFALLBACK-NEXT:    store i64 [[CALL]], i64* [[ARRAYIDX]], align 4
+; TFFALLBACK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; TFFALLBACK-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 4
 ; TFFALLBACK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFFALLBACK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFFALLBACK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -569,11 +557,11 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %gep = getelementptr i64, i64* %b, i64 %indvars.iv
-  %load = load i64, i64* %gep
+  %gep = getelementptr i64, ptr %b, i64 %indvars.iv
+  %load = load i64, ptr %gep
   %call = call i64 @foo(i64 %load) #3
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %indvars.iv
-  store i64 %call, i64* %arrayidx
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  store i64 %call, ptr %arrayidx
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.cond.cleanup, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll
index 04c0f2d1b2157..6f6ce7249d411 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll
@@ -5,12 +5,12 @@
 target triple = "aarch64-unknown-linux-gnu"
 
 ; CHECK-COST: Checking a loop in 'fixed_width'
-; CHECK-COST: Found an estimated cost of 11 for VF 2 For instruction:   store i32 2, i32* %arrayidx1, align 4
-; CHECK-COST: Found an estimated cost of 25 for VF 4 For instruction:   store i32 2, i32* %arrayidx1, align 4
+; CHECK-COST: Found an estimated cost of 11 for VF 2 For instruction:   store i32 2, ptr %arrayidx1, align 4
+; CHECK-COST: Found an estimated cost of 25 for VF 4 For instruction:   store i32 2, ptr %arrayidx1, align 4
 ; CHECK-COST: Selecting VF: 1.
 
 ; We should decide this loop is not worth vectorising using fixed width vectors
-define void @fixed_width(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) #0 {
+define void @fixed_width(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
 ; CHECK-LABEL: @fixed_width(
 ; CHECK-NOT: vector.body
 entry:
@@ -28,14 +28,14 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 
 for.body:                                         ; preds = %for.body.preheader, %for.inc
   %i.07 = phi i64 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.07
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %i.07
+  %0 = load i32, ptr %arrayidx, align 4
   %tobool.not = icmp eq i32 %0, 0
   br i1 %tobool.not, label %for.inc, label %if.then
 
 if.then:                                          ; preds = %for.body
-  %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %i.07
-  store i32 2, i32* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %i.07
+  store i32 2, ptr %arrayidx1, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -46,12 +46,12 @@ for.inc:                                          ; preds = %for.body, %if.then
 
 
 ; CHECK-COST: Checking a loop in 'scalable'
-; CHECK-COST: Found an estimated cost of 1 for VF vscale x 4 For instruction:   store i32 2, i32* %arrayidx1, align 4
+; CHECK-COST: Found an estimated cost of 1 for VF vscale x 4 For instruction:   store i32 2, ptr %arrayidx1, align 4
 
-define void @scalable(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) #0 {
+define void @scalable(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
 ; CHECK-LABEL: @scalable(
 ; CHECK: vector.body
-; CHECK: call void @llvm.masked.store.nxv4i32.p0nxv4i32
+; CHECK: call void @llvm.masked.store.nxv4i32.p0
 entry:
   %cmp6 = icmp sgt i64 %n, 0
   br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
@@ -67,14 +67,14 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 
 for.body:                                         ; preds = %for.body.preheader, %for.inc
   %i.07 = phi i64 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.07
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %i.07
+  %0 = load i32, ptr %arrayidx, align 4
   %tobool.not = icmp eq i32 %0, 0
   br i1 %tobool.not, label %for.inc, label %if.then
 
 if.then:                                          ; preds = %for.body
-  %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %i.07
-  store i32 2, i32* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %i.07
+  store i32 2, ptr %arrayidx1, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll b/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll
index 85f9e6d345cff..3550d401dc51a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll
@@ -32,21 +32,21 @@ target triple = "aarch64--linux-gnu"
 ; CHECK: load <4 x i32>
 ; CHECK: store <4 x i32>
 
-define void @max_vf(%struct.pair* noalias nocapture %p) {
+define void @max_vf(ptr noalias nocapture %p) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %0 = add nuw nsw i64 %i, 2
-  %p_i.x = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %i, i32 0
-  %p_i_plus_2.x = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %0, i32 0
-  %1 = load i32, i32* %p_i.x, align 4
-  store i32 %1, i32* %p_i_plus_2.x, align 4
-  %p_i.y = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %i, i32 1
-  %p_i_plus_2.y = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %0, i32 1
-  %2 = load i32, i32* %p_i.y, align 4
-  store i32 %2, i32* %p_i_plus_2.y, align 4
+  %p_i.x = getelementptr inbounds %struct.pair, ptr %p, i64 %i, i32 0
+  %p_i_plus_2.x = getelementptr inbounds %struct.pair, ptr %p, i64 %0, i32 0
+  %1 = load i32, ptr %p_i.x, align 4
+  store i32 %1, ptr %p_i_plus_2.x, align 4
+  %p_i.y = getelementptr inbounds %struct.pair, ptr %p, i64 %i, i32 1
+  %p_i_plus_2.y = getelementptr inbounds %struct.pair, ptr %p, i64 %0, i32 1
+  %2 = load i32, ptr %p_i.y, align 4
+  store i32 %2, ptr %p_i_plus_2.y, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp eq i64 %i.next, 1000
   br i1 %cond, label %for.exit, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll
index 03cb59b4fd476..785241d342ddc 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll
@@ -9,14 +9,14 @@ target triple = "aarch64--linux-gnu"
 ; CHECK:       LV: Found an estimated cost of 1 for VF 2 For instruction: %i.next = add nuw nsw i64 %i, 2
 ; CHECK:       LV: Not considering vector loop of width 2 because it will not generate any vector instructions
 ;
-define void @all_scalar(i64* %a, i64 %n) {
+define void @all_scalar(ptr %a, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr i64, i64* %a, i64 %i
-  store i64 0, i64* %tmp0, align 1
+  %tmp0 = getelementptr i64, ptr %a, i64 %i
+  store i64 0, ptr %tmp0, align 1
   %i.next = add nuw nsw i64 %i, 2
   %cond = icmp eq i64 %i.next, %n
   br i1 %cond, label %for.end, label %for.body
@@ -30,15 +30,15 @@ for.end:
 ; CHECK:       LV: Found an estimated cost of 0 for VF 8 For instruction: %i.next = zext i32 %j.next to i64
 ; CHECK:       LV: Not considering vector loop of width 8 because it will not generate any vector instructions
 %struct.a = type { i32, i8 }
-define void @PR33193(%struct.a* %a, i64 %n) {
+define void @PR33193(ptr %a, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %j = phi i32 [ 0, %entry ], [ %j.next, %for.body ]
-  %tmp0 = getelementptr inbounds %struct.a, %struct.a* %a, i64 %i, i32 1
-  store i8 0, i8* %tmp0, align 4
+  %tmp0 = getelementptr inbounds %struct.a, ptr %a, i64 %i, i32 1
+  store i8 0, ptr %tmp0, align 4
   %j.next = add i32 %j, 1
   %i.next = zext i32 %j.next to i64
   %cond = icmp ugt i64 %n, %i.next

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll
index f6228fcfece78..80410a44e9e67 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll
@@ -24,17 +24,17 @@
 ; CHECK-LABEL: vector.body:
 ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
 ; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ]
-; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, <4 x i64> %[[VecInd]]
+; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]]
 ; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
-; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[VecIndTr]], <4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 ; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
 ; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]]
 ; CHECK: br label %[[InnerLoop:.+]]
 
 ; CHECK: [[InnerLoop]]:
 ; CHECK: %[[InnerPhi:.*]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ]
-; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]]
-; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[StoreVal]], <4 x i32*> %[[AAddr2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true
+; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]]
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true
 ; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], <i64 1, i64 1, i64 1, i64 1>
 ; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], <i64 8, i64 8, i64 8, i64 8>
 ; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0
@@ -59,17 +59,17 @@ entry:
 
 for.body:                                         ; preds = %for.inc8, %entry
   %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]
-  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, i64 %indvars.iv21
+  %arrayidx = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, i64 %indvars.iv21
   %0 = trunc i64 %indvars.iv21 to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %1 = trunc i64 %indvars.iv21 to i32
   %add = add nsw i32 %1, %n
   br label %for.body3
 
 for.body3:                                        ; preds = %for.body3, %for.body
   %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
-  %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
-  store i32 %add, i32* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
+  store i32 %add, ptr %arrayidx7, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 8
   br i1 %exitcond, label %for.inc8, label %for.body3
@@ -91,15 +91,15 @@ for.end10:                                        ; preds = %for.inc8
 ; CHECK-LABEL: vector.body:
 ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
 ; CHECK: %[[VecInd:.*]] = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ]
-; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i64], [8 x i64]* @arrX, i64 0, <2 x i64> %[[VecInd]]
-; CHECK: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> %[[VecInd]], <2 x i64*> %[[AAddr]], i32 4, <2 x i1> <i1 true, i1 true>)
+; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i64], ptr @arrX, i64 0, <2 x i64> %[[VecInd]]
+; CHECK: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %[[VecInd]], <2 x ptr> %[[AAddr]], i32 4, <2 x i1> <i1 true, i1 true>)
 ; CHECK: %[[StoreVal:.*]] = add nsw <2 x i64> %[[VecInd]], %[[Splat]]
 ; CHECK: br label %[[InnerLoop:.+]]
 
 ; CHECK: [[InnerLoop]]:
 ; CHECK: %[[InnerPhi:.*]] = phi <2 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ]
-; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* @arrY, i64 0, <2 x i64> %[[InnerPhi]], <2 x i64> %[[VecInd]]
-; CHECK: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> %[[StoreVal]], <2 x i64*> %[[AAddr2]], i32 4, <2 x i1> <i1 true, i1 true>
+; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i64]], ptr @arrY, i64 0, <2 x i64> %[[InnerPhi]], <2 x i64> %[[VecInd]]
+; CHECK: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %[[StoreVal]], <2 x ptr> %[[AAddr2]], i32 4, <2 x i1> <i1 true, i1 true>
 ; CHECK: %[[InnerPhiNext]] = add nuw nsw <2 x i64> %[[InnerPhi]], <i64 1, i64 1>
 ; CHECK: %[[VecCond:.*]] = icmp eq <2 x i64> %[[InnerPhiNext]], <i64 8, i64 8>
 ; CHECK: %[[InnerCond:.*]] = extractelement <2 x i1> %[[VecCond]], i32 0
@@ -117,15 +117,15 @@ entry:
 
 for.body:                                         ; preds = %for.inc8, %entry
   %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]
-  %arrayidx = getelementptr inbounds [8 x i64], [8 x i64]* @arrX, i64 0, i64 %indvars.iv21
-  store i64 %indvars.iv21, i64* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [8 x i64], ptr @arrX, i64 0, i64 %indvars.iv21
+  store i64 %indvars.iv21, ptr %arrayidx, align 4
   %add = add nsw i64 %indvars.iv21, %n
   br label %for.body3
 
 for.body3:                                        ; preds = %for.body3, %for.body
   %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
-  %arrayidx7 = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* @arrY, i64 0, i64 %indvars.iv, i64 %indvars.iv21
-  store i64 %add, i64* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds [8 x [8 x i64]], ptr @arrY, i64 0, i64 %indvars.iv, i64 %indvars.iv21
+  store i64 %add, ptr %arrayidx7, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 8
   br i1 %exitcond, label %for.inc8, label %for.body3

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr31900.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr31900.ll
index f86540a5ffa05..43d0e8f9b9c98 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr31900.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr31900.ll
@@ -7,32 +7,32 @@
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128-p:16:16-p4:32:16"
 
 ; Check that all the loads are scalarized
-; CHECK: load i16, i16*
-; CHECK: load i16, i16*
-; CHECK: load i16, i16 addrspace(4)*
-; CHECK: load i16, i16 addrspace(4)*
+; CHECK: load i16, ptr
+; CHECK: load i16, ptr
+; CHECK: load i16, ptr addrspace(4)
+; CHECK: load i16, ptr addrspace(4)
 ; CHECK: store <2 x i16>
 
 %rec1445 = type { i16, i16, i16, i16, i16 }
 
-define void @foo(%rec1445* %a, %rec1445 addrspace(4)* %b, i16* noalias %dst) {
+define void @foo(ptr %a, ptr addrspace(4) %b, ptr noalias %dst) {
 bb1:
   br label %bb4
 
 bb4:
   %tmp1 = phi i16 [ 0, %bb1 ], [ %_tmp1013, %bb4 ]
-  %tmp2 = phi %rec1445* [ %a, %bb1 ], [ %_tmp1015, %bb4 ]
-  %tmp3 = phi %rec1445 addrspace(4)* [ %b, %bb1 ], [ %_tmp1017, %bb4 ]
-  %0 = getelementptr %rec1445, %rec1445* %tmp2, i16 0, i32 1
-  %_tmp987 = load i16, i16* %0, align 1
-  %1 = getelementptr %rec1445, %rec1445 addrspace(4)* %tmp3, i32 0, i32 1
-  %_tmp993 = load i16, i16 addrspace(4)* %1, align 1
+  %tmp2 = phi ptr [ %a, %bb1 ], [ %_tmp1015, %bb4 ]
+  %tmp3 = phi ptr addrspace(4) [ %b, %bb1 ], [ %_tmp1017, %bb4 ]
+  %0 = getelementptr %rec1445, ptr %tmp2, i16 0, i32 1
+  %_tmp987 = load i16, ptr %0, align 1
+  %1 = getelementptr %rec1445, ptr addrspace(4) %tmp3, i32 0, i32 1
+  %_tmp993 = load i16, ptr addrspace(4) %1, align 1
   %add = add i16 %_tmp987, %_tmp993
-  %dst.gep = getelementptr inbounds i16, i16* %dst, i16 %tmp1
-  store i16 %add, i16* %dst.gep
+  %dst.gep = getelementptr inbounds i16, ptr %dst, i16 %tmp1
+  store i16 %add, ptr %dst.gep
   %_tmp1013 = add i16 %tmp1, 1
-  %_tmp1015 = getelementptr %rec1445, %rec1445* %tmp2, i16 1
-  %_tmp1017 = getelementptr %rec1445, %rec1445 addrspace(4)* %tmp3, i32 1
+  %_tmp1015 = getelementptr %rec1445, ptr %tmp2, i16 1
+  %_tmp1017 = getelementptr %rec1445, ptr addrspace(4) %tmp3, i32 1
   %_tmp1019 = icmp ult i16 %_tmp1013, 24
   br i1 %_tmp1019, label %bb4, label %bb16
 

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll
index bf93ec83a2fc7..20b536499afa7 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll
@@ -3,19 +3,19 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-gnu"
 
 @b = common local_unnamed_addr global i32 0, align 4
- at a = common local_unnamed_addr global i16* null, align 8
+ at a = common local_unnamed_addr global ptr null, align 8
 
 define i32 @fn1() local_unnamed_addr #0 {
 ; We expect the backend to expand all reductions.
 ; CHECK: @llvm.vector.reduce
 entry:
-  %0 = load i32, i32* @b, align 4, !tbaa !1
+  %0 = load i32, ptr @b, align 4, !tbaa !1
   %cmp40 = icmp sgt i32 %0, 0
   br i1 %cmp40, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:                                   ; preds = %entry
-  %1 = load i16*, i16** @a, align 8, !tbaa !5
-  %2 = load i32, i32* @b, align 4, !tbaa !1
+  %1 = load ptr, ptr @a, align 8, !tbaa !5
+  %2 = load i32, ptr @b, align 4, !tbaa !1
   %3 = sext i32 %2 to i64
   br label %for.body
 
@@ -23,8 +23,8 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %d.043 = phi i16 [ undef, %for.body.lr.ph ], [ %.sink28, %for.body ]
   %c.042 = phi i16 [ undef, %for.body.lr.ph ], [ %c.0., %for.body ]
-  %arrayidx = getelementptr inbounds i16, i16* %1, i64 %indvars.iv
-  %4 = load i16, i16* %arrayidx, align 2, !tbaa !7
+  %arrayidx = getelementptr inbounds i16, ptr %1, i64 %indvars.iv
+  %4 = load i16, ptr %arrayidx, align 2, !tbaa !7
   %cmp2 = icmp sgt i16 %c.042, %4
   %c.0. = select i1 %cmp2, i16 %c.042, i16 %4
   %cmp13 = icmp slt i16 %d.043, %4

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll
index e4e415a8b0806..44820e061211a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll
@@ -14,7 +14,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 define void @_Z1dv() local_unnamed_addr #0 {
 ; CHECK-LABEL: @_Z1dv(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call i8* @"_ZN3$_01aEv"(%struct.anon* nonnull @b)
+; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @"_ZN3$_01aEv"(ptr nonnull @b)
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
 ; CHECK-NEXT:    [[F_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD5:%.*]], [[FOR_COND_CLEANUP:%.*]] ]
@@ -36,16 +36,16 @@ define void @_Z1dv() local_unnamed_addr #0 {
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
 ; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[CONV]], [[TMP1]]
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[ADD]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [6 x i8], [6 x i8]* @c, i64 0, i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store i8 [[TMP2]], i8* [[ARRAYIDX3]], align 1
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [6 x i8], ptr @c, i64 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[CALL]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i8 [[TMP2]], ptr [[ARRAYIDX3]], align 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
 ;
 entry:
-  %call = tail call i8* @"_ZN3$_01aEv"(%struct.anon* nonnull @b) #2
+  %call = tail call ptr @"_ZN3$_01aEv"(ptr nonnull @b) #2
   br label %for.cond
 
 for.cond:                                         ; preds = %for.cond.cleanup, %entry
@@ -72,13 +72,13 @@ for.body:                                         ; preds = %for.body, %for.body
   %1 = trunc i64 %indvars.iv to i32
   %add = add i32 %conv, %1
   %idxprom = zext i32 %add to i64
-  %arrayidx = getelementptr inbounds [6 x i8], [6 x i8]* @c, i64 0, i64 %idxprom
-  %2 = load i8, i8* %arrayidx, align 1
-  %arrayidx3 = getelementptr inbounds i8, i8* %call, i64 %indvars.iv
-  store i8 %2, i8* %arrayidx3, align 1
+  %arrayidx = getelementptr inbounds [6 x i8], ptr @c, i64 0, i64 %idxprom
+  %2 = load i8, ptr %arrayidx, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %call, i64 %indvars.iv
+  store i8 %2, ptr %arrayidx3, align 1
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 4
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
 }
 
-declare i8* @"_ZN3$_01aEv"(%struct.anon*) local_unnamed_addr #1
+declare ptr @"_ZN3$_01aEv"(ptr) local_unnamed_addr #1

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr46950-load-cast-context-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr46950-load-cast-context-crash.ll
index 2a974e5c97ea7..ae5e7d0af5762 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr46950-load-cast-context-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr46950-load-cast-context-crash.ll
@@ -3,9 +3,9 @@
 ; CHECK-LABEL: define void @test(
 ; CHECK: vector.body
 
-define void @test(i64* %dst, i32* %src) {
+define void @test(ptr %dst, ptr %src) {
 entry:
-  %l = load i32, i32* %src
+  %l = load i32, ptr %src
   br label %loop.ph
 
 loop.ph:
@@ -14,8 +14,8 @@ loop.ph:
 loop:
   %iv = phi i64 [ 0, %loop.ph ], [ %iv.next, %loop ]
   %l.cast = sext i32 %l to i64
-  %dst.idx = getelementptr i64, i64* %dst, i64 %iv
-  store i64 %l.cast, i64* %dst.idx
+  %dst.idx = getelementptr i64, ptr %dst, i64 %iv
+  store i64 %l.cast, ptr %dst.idx
   %iv.next = add nuw nsw i64 %iv, 1
   %cmp9.us = icmp ult i64 %iv.next, 20
   br i1 %cmp9.us, label %loop, label %exit

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll
index 32c0076e40d33..ff544df55286c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll
@@ -21,17 +21,17 @@ target triple = "aarch64--linux-gnu"
 ; CHECK: Scalarizing and predicating: %tmp4 = udiv i32 %tmp2, %tmp3
 ; CHECK: Found an estimated cost of 5 for VF 2 For instruction: %tmp4 = udiv i32 %tmp2, %tmp3
 ;
-define i32 @predicated_udiv(i32* %a, i32* %b, i1 %c, i64 %n) {
+define i32 @predicated_udiv(ptr %a, ptr %b, i1 %c, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
   %r = phi i32 [ 0, %entry ], [ %tmp6, %for.inc ]
-  %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
-  %tmp1 = getelementptr inbounds i32, i32* %b, i64 %i
-  %tmp2 = load i32, i32* %tmp0, align 4
-  %tmp3 = load i32, i32* %tmp1, align 4
+  %tmp0 = getelementptr inbounds i32, ptr %a, i64 %i
+  %tmp1 = getelementptr inbounds i32, ptr %b, i64 %i
+  %tmp2 = load i32, ptr %tmp0, align 4
+  %tmp3 = load i32, ptr %tmp1, align 4
   br i1 %c, label %if.then, label %for.inc
 
 if.then:
@@ -59,22 +59,22 @@ for.end:
 ; Cost of store:
 ;   (store(4) + extractelement(3)) / 2 = 3
 ;
-; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %tmp0, align 4
-; CHECK: Found an estimated cost of 3 for VF 2 For instruction: store i32 %tmp2, i32* %tmp0, align 4
+; CHECK: Scalarizing and predicating: store i32 %tmp2, ptr %tmp0, align 4
+; CHECK: Found an estimated cost of 3 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4
 ;
-define void @predicated_store(i32* %a, i1 %c, i32 %x, i64 %n) {
+define void @predicated_store(ptr %a, i1 %c, i32 %x, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
-  %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
-  %tmp1 = load i32, i32* %tmp0, align 4
+  %tmp0 = getelementptr inbounds i32, ptr %a, i64 %i
+  %tmp1 = load i32, ptr %tmp0, align 4
   %tmp2 = add nsw i32 %tmp1, %x
   br i1 %c, label %if.then, label %for.inc
 
 if.then:
-  store i32 %tmp2, i32* %tmp0, align 4
+  store i32 %tmp2, ptr %tmp0, align 4
   br label %for.inc
 
 for.inc:
@@ -90,31 +90,31 @@ for.end:
 ;
 ; Same as predicate_store except we use a pointer PHI to maintain the address
 ;
-; CHECK: Found scalar instruction:   %addr = phi i32* [ %a, %entry ], [ %addr.next, %for.inc ]
-; CHECK: Found scalar instruction:   %addr.next = getelementptr inbounds i32, i32* %addr, i64 1
-; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %addr, align 4
-; CHECK: Found an estimated cost of 0 for VF 2 For instruction:   %addr = phi i32* [ %a, %entry ], [ %addr.next, %for.inc ]
-; CHECK: Found an estimated cost of 3 for VF 2 For instruction: store i32 %tmp2, i32* %addr, align 4
+; CHECK: Found scalar instruction:   %addr = phi ptr [ %a, %entry ], [ %addr.next, %for.inc ]
+; CHECK: Found scalar instruction:   %addr.next = getelementptr inbounds i32, ptr %addr, i64 1
+; CHECK: Scalarizing and predicating: store i32 %tmp2, ptr %addr, align 4
+; CHECK: Found an estimated cost of 0 for VF 2 For instruction:   %addr = phi ptr [ %a, %entry ], [ %addr.next, %for.inc ]
+; CHECK: Found an estimated cost of 3 for VF 2 For instruction: store i32 %tmp2, ptr %addr, align 4
 ;
-define void @predicated_store_phi(i32* %a, i1 %c, i32 %x, i64 %n) {
+define void @predicated_store_phi(ptr %a, i1 %c, i32 %x, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
-  %addr = phi i32 * [ %a, %entry ], [ %addr.next, %for.inc ]
-  %tmp1 = load i32, i32* %addr, align 4
+  %addr = phi ptr [ %a, %entry ], [ %addr.next, %for.inc ]
+  %tmp1 = load i32, ptr %addr, align 4
   %tmp2 = add nsw i32 %tmp1, %x
   br i1 %c, label %if.then, label %for.inc
 
 if.then:
-  store i32 %tmp2, i32* %addr, align 4
+  store i32 %tmp2, ptr %addr, align 4
   br label %for.inc
 
 for.inc:
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
-  %addr.next = getelementptr inbounds i32, i32* %addr, i64 1
+  %addr.next = getelementptr inbounds i32, ptr %addr, i64 1
   br i1 %cond, label %for.body, label %for.end
 
 for.end:
@@ -138,15 +138,15 @@ for.end:
 ; CHECK: Found an estimated cost of 2 for VF 2 For instruction: %tmp3 = add nsw i32 %tmp2, %x
 ; CHECK: Found an estimated cost of 4 for VF 2 For instruction: %tmp4 = udiv i32 %tmp2, %tmp3
 ;
-define i32 @predicated_udiv_scalarized_operand(i32* %a, i1 %c, i32 %x, i64 %n) {
+define i32 @predicated_udiv_scalarized_operand(ptr %a, i1 %c, i32 %x, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
   %r = phi i32 [ 0, %entry ], [ %tmp6, %for.inc ]
-  %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
-  %tmp2 = load i32, i32* %tmp0, align 4
+  %tmp0 = getelementptr inbounds i32, ptr %a, i64 %i
+  %tmp2 = load i32, ptr %tmp0, align 4
   br i1 %c, label %if.then, label %for.inc
 
 if.then:
@@ -179,23 +179,23 @@ for.end:
 ;   store(4) / 2 = 2
 ;
 ; CHECK: Scalarizing: %tmp2 = add nsw i32 %tmp1, %x
-; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %tmp0, align 4
+; CHECK: Scalarizing and predicating: store i32 %tmp2, ptr %tmp0, align 4
 ; CHECK: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = add nsw i32 %tmp1, %x
-; CHECK: Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp2, i32* %tmp0, align 4
+; CHECK: Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4
 ;
-define void @predicated_store_scalarized_operand(i32* %a, i1 %c, i32 %x, i64 %n) {
+define void @predicated_store_scalarized_operand(ptr %a, i1 %c, i32 %x, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
-  %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
-  %tmp1 = load i32, i32* %tmp0, align 4
+  %tmp0 = getelementptr inbounds i32, ptr %a, i64 %i
+  %tmp1 = load i32, ptr %tmp0, align 4
   br i1 %c, label %if.then, label %for.inc
 
 if.then:
   %tmp2 = add nsw i32 %tmp1, %x
-  store i32 %tmp2, i32* %tmp0, align 4
+  store i32 %tmp2, ptr %tmp0, align 4
   br label %for.inc
 
 for.inc:
@@ -231,21 +231,21 @@ for.end:
 ; CHECK:     Scalarizing and predicating: %tmp3 = sdiv i32 %tmp1, %tmp2
 ; CHECK:     Scalarizing and predicating: %tmp4 = udiv i32 %tmp3, %tmp2
 ; CHECK:     Scalarizing: %tmp5 = sub i32 %tmp4, %x
-; CHECK:     Scalarizing and predicating: store i32 %tmp5, i32* %tmp0, align 4
+; CHECK:     Scalarizing and predicating: store i32 %tmp5, ptr %tmp0, align 4
 ; CHECK:     Found an estimated cost of 1 for VF 2 For instruction: %tmp2 = add i32 %tmp1, %x
 ; CHECK:     Found an estimated cost of 5 for VF 2 For instruction: %tmp3 = sdiv i32 %tmp1, %tmp2
 ; CHECK:     Found an estimated cost of 5 for VF 2 For instruction: %tmp4 = udiv i32 %tmp3, %tmp2
 ; CHECK:     Found an estimated cost of 2 for VF 2 For instruction: %tmp5 = sub i32 %tmp4, %x
-; CHECK:     Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp5, i32* %tmp0, align 4
+; CHECK:     Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp5, ptr %tmp0, align 4
 ;
-define void @predication_multi_context(i32* %a, i1 %c, i32 %x, i64 %n) {
+define void @predication_multi_context(ptr %a, i1 %c, i32 %x, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
-  %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
-  %tmp1 = load i32, i32* %tmp0, align 4
+  %tmp0 = getelementptr inbounds i32, ptr %a, i64 %i
+  %tmp1 = load i32, ptr %tmp0, align 4
   br i1 %c, label %if.then, label %for.inc
 
 if.then:
@@ -253,7 +253,7 @@ if.then:
   %tmp3 = sdiv i32 %tmp1, %tmp2
   %tmp4 = udiv i32 %tmp3, %tmp2
   %tmp5 = sub i32 %tmp4, %x
-  store i32 %tmp5, i32* %tmp0, align 4
+  store i32 %tmp5, ptr %tmp0, align 4
   br label %for.inc
 
 for.inc:

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
index fea917239c570..a60577b44d377 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
@@ -23,7 +23,7 @@ target triple = "aarch64--linux-gnu"
 ; CHECK:   [[Rdx:%[a-zA-Z0-9.]+]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8>
 ; CHECK:   zext i8 [[Rdx]] to i32
 ;
-define i8 @reduction_i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) {
+define i8 @reduction_i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) {
 entry:
   %cmp.12 = icmp sgt i32 %n, 0
   br i1 %cmp.12, label %for.body.preheader, label %for.cond.cleanup
@@ -43,11 +43,11 @@ for.cond.cleanup:
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %sum.013 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
-  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv
-  %1 = load i8, i8* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv
+  %1 = load i8, ptr %arrayidx2, align 1
   %conv3 = zext i8 %1 to i32
   %conv4 = and i32 %sum.013, 255
   %add = add nuw nsw i32 %conv, %conv4
@@ -78,7 +78,7 @@ for.body:
 ; CHECK:   [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16>
 ; CHECK:   zext i16 [[Rdx]] to i32
 ;
-define i16 @reduction_i16_1(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %n) {
+define i16 @reduction_i16_1(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) {
 entry:
   %cmp.16 = icmp sgt i32 %n, 0
   br i1 %cmp.16, label %for.body.preheader, label %for.cond.cleanup
@@ -98,11 +98,11 @@ for.cond.cleanup:
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %sum.017 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i16, i16* %a, i64 %indvars.iv
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %a, i64 %indvars.iv
+  %0 = load i16, ptr %arrayidx, align 2
   %conv.14 = zext i16 %0 to i32
-  %arrayidx2 = getelementptr inbounds i16, i16* %b, i64 %indvars.iv
-  %1 = load i16, i16* %arrayidx2, align 2
+  %arrayidx2 = getelementptr inbounds i16, ptr %b, i64 %indvars.iv
+  %1 = load i16, ptr %arrayidx2, align 2
   %conv3.15 = zext i16 %1 to i32
   %conv4.13 = and i32 %sum.017, 65535
   %add = add nuw nsw i32 %conv.14, %conv4.13
@@ -135,7 +135,7 @@ for.body:
 ; CHECK:   [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16>
 ; CHECK:   zext i16 [[Rdx]] to i32
 ;
-define i16 @reduction_i16_2(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) {
+define i16 @reduction_i16_2(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) {
 entry:
   %cmp.14 = icmp sgt i32 %n, 0
   br i1 %cmp.14, label %for.body.preheader, label %for.cond.cleanup
@@ -155,11 +155,11 @@ for.cond.cleanup:
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %sum.015 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
-  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv
-  %1 = load i8, i8* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv
+  %1 = load i8, ptr %arrayidx2, align 1
   %conv3 = zext i8 %1 to i32
   %conv4.13 = and i32 %sum.015, 65535
   %add = add nuw nsw i32 %conv, %conv4.13

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll
index 71b77540f47f0..4dd65220e2bc2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll
@@ -10,23 +10,23 @@
 ; CHECK-LABEL: define {{.*}} @test_tc_too_small
 ; CHECK-NOT: vector.memcheck
 ; CHECK-NOT: vector.body
-define void @test_tc_too_small(i16* %ptr.1, i16* %ptr.2, i16* %ptr.3, i16* %ptr.4, i64 %off.1, i64 %off.2) {
+define void @test_tc_too_small(ptr %ptr.1, ptr %ptr.2, ptr %ptr.3, ptr %ptr.4, i64 %off.1, i64 %off.2) {
 entry:
   br label %loop
 
 loop:                                             ; preds = %bb54, %bb37
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %gep.1 = getelementptr inbounds i16, i16* %ptr.1, i64 %iv
-  %lv.1 = load i16, i16* %gep.1, align 2
+  %gep.1 = getelementptr inbounds i16, ptr %ptr.1, i64 %iv
+  %lv.1 = load i16, ptr %gep.1, align 2
   %ext.1 = sext i16 %lv.1 to i32
-  %gep.2 = getelementptr inbounds i16, i16* %ptr.2, i64 %iv
-  %lv.2 = load i16, i16* %gep.2, align 2
+  %gep.2 = getelementptr inbounds i16, ptr %ptr.2, i64 %iv
+  %lv.2 = load i16, ptr %gep.2, align 2
   %ext.2 = sext i16 %lv.2 to i32
-  %gep.off.1 = getelementptr inbounds i16, i16* %gep.2, i64 %off.1
-  %lv.3 = load i16, i16* %gep.off.1, align 2
+  %gep.off.1 = getelementptr inbounds i16, ptr %gep.2, i64 %off.1
+  %lv.3 = load i16, ptr %gep.off.1, align 2
   %ext.3 = sext i16 %lv.3 to i32
-  %gep.off.2 = getelementptr inbounds i16, i16* %gep.2, i64 %off.2
-  %lv.4 = load i16, i16* %gep.off.2, align 2
+  %gep.off.2 = getelementptr inbounds i16, ptr %gep.2, i64 %off.2
+  %lv.4 = load i16, ptr %gep.off.2, align 2
   %ext.4 = sext i16 %lv.4 to i32
   %tmp62 = mul nsw i32 %ext.2, 11
   %tmp66 = mul nsw i32 %ext.3, -4
@@ -45,11 +45,11 @@ loop:                                             ; preds = %bb54, %bb37
   %tmp82 = sub nsw i32 %tmp81, %ext.1
   %tmp83 = lshr i32 %tmp82, 1
   %trunc.1 = trunc i32 %tmp75 to i16
-  %gep.3 = getelementptr inbounds i16, i16* %ptr.3, i64 %iv
-  store i16 %trunc.1, i16* %gep.3, align 2
+  %gep.3 = getelementptr inbounds i16, ptr %ptr.3, i64 %iv
+  store i16 %trunc.1, ptr %gep.3, align 2
   %trunc.2 = trunc i32 %tmp83 to i16
-  %gep.4 = getelementptr inbounds i16, i16* %ptr.4, i64 %iv
-  store i16 %trunc.2, i16* %gep.4, align 2
+  %gep.4 = getelementptr inbounds i16, ptr %ptr.4, i64 %iv
+  store i16 %trunc.2, ptr %gep.4, align 2
   %iv.next = add nuw nsw i64 %iv, 1
   %cmp = icmp ult i64 %iv, 10
   br i1 %cmp, label %loop, label %exit
@@ -65,23 +65,23 @@ exit:
 ; THRESHOLD-NOT: vector.memcheck
 ; THRESHOLD-NOT: vector.body
 ;
-define void @test_tc_big_enough(i16* %ptr.1, i16* %ptr.2, i16* %ptr.3, i16* %ptr.4, i64 %off.1, i64 %off.2) {
+define void @test_tc_big_enough(ptr %ptr.1, ptr %ptr.2, ptr %ptr.3, ptr %ptr.4, i64 %off.1, i64 %off.2) {
 entry:
   br label %loop
 
 loop:                                             ; preds = %bb54, %bb37
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %gep.1 = getelementptr inbounds i16, i16* %ptr.1, i64 %iv
-  %lv.1 = load i16, i16* %gep.1, align 2
+  %gep.1 = getelementptr inbounds i16, ptr %ptr.1, i64 %iv
+  %lv.1 = load i16, ptr %gep.1, align 2
   %ext.1 = sext i16 %lv.1 to i32
-  %gep.2 = getelementptr inbounds i16, i16* %ptr.2, i64 %iv
-  %lv.2 = load i16, i16* %gep.2, align 2
+  %gep.2 = getelementptr inbounds i16, ptr %ptr.2, i64 %iv
+  %lv.2 = load i16, ptr %gep.2, align 2
   %ext.2 = sext i16 %lv.2 to i32
-  %gep.off.1 = getelementptr inbounds i16, i16* %gep.2, i64 %off.1
-  %lv.3 = load i16, i16* %gep.off.1, align 2
+  %gep.off.1 = getelementptr inbounds i16, ptr %gep.2, i64 %off.1
+  %lv.3 = load i16, ptr %gep.off.1, align 2
   %ext.3 = sext i16 %lv.3 to i32
-  %gep.off.2 = getelementptr inbounds i16, i16* %gep.2, i64 %off.2
-  %lv.4 = load i16, i16* %gep.off.2, align 2
+  %gep.off.2 = getelementptr inbounds i16, ptr %gep.2, i64 %off.2
+  %lv.4 = load i16, ptr %gep.off.2, align 2
   %ext.4 = sext i16 %lv.4 to i32
   %tmp62 = mul nsw i32 %ext.2, 11
   %tmp66 = mul nsw i32 %ext.3, -4
@@ -100,11 +100,11 @@ loop:                                             ; preds = %bb54, %bb37
   %tmp82 = sub nsw i32 %tmp81, %ext.1
   %tmp83 = lshr i32 %tmp82, 1
   %trunc.1 = trunc i32 %tmp75 to i16
-  %gep.3 = getelementptr inbounds i16, i16* %ptr.3, i64 %iv
-  store i16 %trunc.1, i16* %gep.3, align 2
+  %gep.3 = getelementptr inbounds i16, ptr %ptr.3, i64 %iv
+  store i16 %trunc.1, ptr %gep.3, align 2
   %trunc.2 = trunc i32 %tmp83 to i16
-  %gep.4 = getelementptr inbounds i16, i16* %ptr.4, i64 %iv
-  store i16 %trunc.2, i16* %gep.4, align 2
+  %gep.4 = getelementptr inbounds i16, ptr %ptr.4, i64 %iv
+  store i16 %trunc.2, ptr %gep.4, align 2
   %iv.next = add nuw nsw i64 %iv, 1
   %cmp = icmp ult i64 %iv, 500
   br i1 %cmp, label %loop, label %exit
@@ -113,7 +113,7 @@ exit:
   ret void
 }
 
-define void @test_tc_unknown(i16* %ptr.1, i16* %ptr.2, i16* %ptr.3, i16* %ptr.4, i64 %off.1, i64 %off.2, i64 %N) {
+define void @test_tc_unknown(ptr %ptr.1, ptr %ptr.2, ptr %ptr.3, ptr %ptr.4, i64 %off.1, i64 %off.2, i64 %N) {
 ; CHECK-LABEL: define void @test_tc_unknown
 ; DEFAULT:       [[ADD:%.+]] = add i64 %N, 1
 ; DEFAULT-NEXT:  [[C:%.+]] = icmp ult i64 [[ADD]], 16
@@ -126,17 +126,17 @@ entry:
 
 loop:                                             ; preds = %bb54, %bb37
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %gep.1 = getelementptr inbounds i16, i16* %ptr.1, i64 %iv
-  %lv.1 = load i16, i16* %gep.1, align 2
+  %gep.1 = getelementptr inbounds i16, ptr %ptr.1, i64 %iv
+  %lv.1 = load i16, ptr %gep.1, align 2
   %ext.1 = sext i16 %lv.1 to i32
-  %gep.2 = getelementptr inbounds i16, i16* %ptr.2, i64 %iv
-  %lv.2 = load i16, i16* %gep.2, align 2
+  %gep.2 = getelementptr inbounds i16, ptr %ptr.2, i64 %iv
+  %lv.2 = load i16, ptr %gep.2, align 2
   %ext.2 = sext i16 %lv.2 to i32
-  %gep.off.1 = getelementptr inbounds i16, i16* %gep.2, i64 %off.1
-  %lv.3 = load i16, i16* %gep.off.1, align 2
+  %gep.off.1 = getelementptr inbounds i16, ptr %gep.2, i64 %off.1
+  %lv.3 = load i16, ptr %gep.off.1, align 2
   %ext.3 = sext i16 %lv.3 to i32
-  %gep.off.2 = getelementptr inbounds i16, i16* %gep.2, i64 %off.2
-  %lv.4 = load i16, i16* %gep.off.2, align 2
+  %gep.off.2 = getelementptr inbounds i16, ptr %gep.2, i64 %off.2
+  %lv.4 = load i16, ptr %gep.off.2, align 2
   %ext.4 = sext i16 %lv.4 to i32
   %tmp62 = mul nsw i32 %ext.2, 11
   %tmp66 = mul nsw i32 %ext.3, -4
@@ -155,11 +155,11 @@ loop:                                             ; preds = %bb54, %bb37
   %tmp82 = sub nsw i32 %tmp81, %ext.1
   %tmp83 = lshr i32 %tmp82, 1
   %trunc.1 = trunc i32 %tmp75 to i16
-  %gep.3 = getelementptr inbounds i16, i16* %ptr.3, i64 %iv
-  store i16 %trunc.1, i16* %gep.3, align 2
+  %gep.3 = getelementptr inbounds i16, ptr %ptr.3, i64 %iv
+  store i16 %trunc.1, ptr %gep.3, align 2
   %trunc.2 = trunc i32 %tmp83 to i16
-  %gep.4 = getelementptr inbounds i16, i16* %ptr.4, i64 %iv
-  store i16 %trunc.2, i16* %gep.4, align 2
+  %gep.4 = getelementptr inbounds i16, ptr %ptr.4, i64 %iv
+  store i16 %trunc.2, ptr %gep.4, align 2
   %iv.next = add nuw nsw i64 %iv, 1
   %cmp = icmp ult i64 %iv, %N
   br i1 %cmp, label %loop, label %exit

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-alloca.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-alloca.ll
index b0015380a8874..b66bb948a47a5 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-alloca.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-alloca.ll
@@ -4,7 +4,7 @@
 ; CHECK-REMARKS: UserVF ignored because of invalid costs.
 ; CHECK-REMARKS: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): alloca
 ; CHECK-REMARKS: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
-define void @alloca(i32** %vla, i64 %N) {
+define void @alloca(ptr %vla, i64 %N) {
 ; CHECK-LABEL: @alloca(
 ; CHECK-NOT: <vscale x
 
@@ -14,18 +14,18 @@ entry:
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
   %alloca = alloca i32, align 16
-  %arrayidx = getelementptr inbounds i32*, i32** %vla, i64 %iv
-  store i32* %alloca, i32** %arrayidx, align 8
+  %arrayidx = getelementptr inbounds ptr, ptr %vla, i64 %iv
+  store ptr %alloca, ptr %arrayidx, align 8
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
 
 for.end:
-  call void @foo(i32** nonnull %vla)
+  call void @foo(ptr nonnull %vla)
   ret void
 }
 
-declare void @foo(i32**)
+declare void @foo(ptr)
 
 !0 = !{!0, !1}
 !1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
index 3e4a7987e1bea..37d13374d1198 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
@@ -5,10 +5,10 @@
 ; RUN:     -prefer-predicate-over-epilogue=scalar-epilogue -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s
 ; RUN: cat %t | FileCheck %s --check-prefix=CHECK-REMARKS
 
-define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) {
+define void @vec_load(i64 %N, ptr nocapture %a, ptr nocapture readonly %b) {
 ; CHECK-LABEL: @vec_load
 ; CHECK: vector.body:
-; CHECK: %[[LOAD:.*]] = load <vscale x 2 x double>, <vscale x 2 x double>*
+; CHECK: %[[LOAD:.*]] = load <vscale x 2 x double>, ptr
 ; CHECK: call <vscale x 2 x double> @foo_vec(<vscale x 2 x double> %[[LOAD]])
 entry:
   %cmp7 = icmp sgt i64 %N, 0
@@ -16,12 +16,12 @@ entry:
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %b, i64 %iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %b, i64 %iv
+  %0 = load double, ptr %arrayidx, align 8
   %1 = call double @foo(double %0) #0
   %add = fadd double %1, 1.000000e+00
-  %arrayidx2 = getelementptr inbounds double, double* %a, i64 %iv
-  store double %add, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %a, i64 %iv
+  store double %add, ptr %arrayidx2, align 8
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1
@@ -30,7 +30,7 @@ for.end:                                 ; preds = %for.body, %entry
   ret void
 }
 
-define void @vec_scalar(i64 %N, double* nocapture %a) {
+define void @vec_scalar(i64 %N, ptr nocapture %a) {
 ; CHECK-LABEL: @vec_scalar
 ; CHECK: vector.body:
 ; CHECK: call <vscale x 2 x double> @foo_vec(<vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 1.000000e+01, i32 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer))
@@ -42,8 +42,8 @@ for.body:                                         ; preds = %for.body.preheader,
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %0 = call double @foo(double 10.0) #0
   %sub = fsub double %0, 1.000000e+00
-  %arrayidx = getelementptr inbounds double, double* %a, i64 %iv
-  store double %sub, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %a, i64 %iv
+  store double %sub, ptr %arrayidx, align 8
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1
@@ -52,22 +52,22 @@ for.end:                                 ; preds = %for.body, %entry
   ret void
 }
 
-define void @vec_ptr(i64 %N, i64* noalias %a, i64** readnone %b) {
+define void @vec_ptr(i64 %N, ptr noalias %a, ptr readnone %b) {
 ; CHECK-LABEL: @vec_ptr
 ; CHECK: vector.body:
-; CHECK: %[[LOAD:.*]] = load <vscale x 2 x i64*>, <vscale x 2 x i64*>*
-; CHECK: call <vscale x 2 x i64> @bar_vec(<vscale x 2 x i64*> %[[LOAD]])
+; CHECK: %[[LOAD:.*]] = load <vscale x 2 x ptr>, ptr
+; CHECK: call <vscale x 2 x i64> @bar_vec(<vscale x 2 x ptr> %[[LOAD]])
 entry:
   %cmp7 = icmp sgt i64 %N, 0
   br i1 %cmp7, label %for.body, label %for.end
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %gep = getelementptr i64*, i64** %b, i64 %iv
-  %load = load i64*, i64** %gep
-  %call = call i64 @bar(i64* %load) #1
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %iv
-  store i64 %call, i64* %arrayidx
+  %gep = getelementptr ptr, ptr %b, i64 %iv
+  %load = load ptr, ptr %gep
+  %call = call i64 @bar(ptr %load) #1
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
+  store i64 %call, ptr %arrayidx
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1024
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
@@ -76,10 +76,10 @@ for.end:
   ret void
 }
 
-define void @vec_intrinsic(i64 %N, double* nocapture readonly %a) {
+define void @vec_intrinsic(i64 %N, ptr nocapture readonly %a) {
 ; CHECK-LABEL: @vec_intrinsic
 ; CHECK: vector.body:
-; CHECK: %[[LOAD:.*]] = load <vscale x 2 x double>, <vscale x 2 x double>*
+; CHECK: %[[LOAD:.*]] = load <vscale x 2 x double>, ptr
 ; CHECK: call fast <vscale x 2 x double> @sin_vec_nxv2f64(<vscale x 2 x double> %[[LOAD]])
 entry:
   %cmp7 = icmp sgt i64 %N, 0
@@ -87,11 +87,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %a, i64 %iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %a, i64 %iv
+  %0 = load double, ptr %arrayidx, align 8
   %1 = call fast double @llvm.sin.f64(double %0) #2
   %add = fadd fast double %1, 1.000000e+00
-  store double %add, double* %arrayidx, align 8
+  store double %add, ptr %arrayidx, align 8
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %N
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
@@ -104,7 +104,7 @@ for.end:
 ; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
 ; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
 ; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
-define void @vec_sin_no_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
+define void @vec_sin_no_mapping(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %n) {
 ; CHECK: @vec_sin_no_mapping
 ; CHECK: call fast <2 x float> @llvm.sin.v2f32
 ; CHECK-NOT: <vscale x
@@ -113,11 +113,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
-  %0 = load float, float* %arrayidx, align 4, !dbg !11
+  %arrayidx = getelementptr inbounds float, ptr %src, i64 %i.07
+  %0 = load float, ptr %arrayidx, align 4, !dbg !11
   %1 = tail call fast float @llvm.sin.f32(float %0), !dbg !12
-  %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
-  store float %1, float* %arrayidx1, align 4, !dbg !13
+  %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07
+  store float %1, ptr %arrayidx1, align 4, !dbg !13
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
@@ -131,7 +131,7 @@ for.cond.cleanup:                                 ; preds = %for.body
 ; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
 ; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
 ; CHECK-REMARKS-NEXT: t.c:3:40: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
-define void @vec_sin_no_mapping_ite(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
+define void @vec_sin_no_mapping_ite(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %n) {
 ; CHECK: @vec_sin_no_mapping_ite
 ; CHECK-NOT: <vscale x
 ; CHECK: ret
@@ -140,8 +140,8 @@ entry:
 
 for.body:                                         ; preds = %entry, %if.end
   %i.07 = phi i64 [ %inc, %if.end ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
-  %0 = load float, float* %arrayidx, align 4, !dbg !11
+  %arrayidx = getelementptr inbounds float, ptr %src, i64 %i.07
+  %0 = load float, ptr %arrayidx, align 4, !dbg !11
   %cmp = fcmp ugt float %0, 0.0000
   br i1 %cmp, label %if.then, label %if.else
 if.then:
@@ -152,8 +152,8 @@ if.else:
   br label %if.end
 if.end:
   %3 = phi float [%1, %if.then], [%2, %if.else]
-  %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
-  store float %3, float* %arrayidx1, align 4, !dbg !14
+  %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07
+  store float %3, ptr %arrayidx1, align 4, !dbg !14
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
@@ -166,7 +166,7 @@ for.cond.cleanup:                                 ; preds = %for.body
 ; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
 ; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
 ; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
-define void @vec_sin_fixed_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
+define void @vec_sin_fixed_mapping(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %n) {
 ; CHECK: @vec_sin_fixed_mapping
 ; CHECK: call fast <2 x float> @llvm.sin.v2f32
 ; CHECK-NOT: <vscale x
@@ -175,11 +175,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
-  %0 = load float, float* %arrayidx, align 4, !dbg !11
+  %arrayidx = getelementptr inbounds float, ptr %src, i64 %i.07
+  %0 = load float, ptr %arrayidx, align 4, !dbg !11
   %1 = tail call fast float @llvm.sin.f32(float %0) #3, !dbg !12
-  %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
-  store float %1, float* %arrayidx1, align 4, !dbg !13
+  %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07
+  store float %1, ptr %arrayidx1, align 4, !dbg !13
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
@@ -191,7 +191,7 @@ for.cond.cleanup:                                 ; preds = %for.body
 ; Even though there are no function mappings attached to the call
 ; in the loop below we can still vectorize the loop because SVE has
 ; hardware support in the form of the 'fqsrt' instruction.
-define void @vec_sqrt_no_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) #0 {
+define void @vec_sqrt_no_mapping(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %n) #0 {
 ; CHECK: @vec_sqrt_no_mapping
 ; CHECK: call fast <vscale x 2 x float> @llvm.sqrt.nxv2f32
 entry:
@@ -199,11 +199,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %src, i64 %i.07
+  %0 = load float, ptr %arrayidx, align 4
   %1 = tail call fast float @llvm.sqrt.f32(float %0)
-  %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
-  store float %1, float* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07
+  store float %1, ptr %arrayidx1, align 4
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
@@ -214,13 +214,13 @@ for.cond.cleanup:                                 ; preds = %for.body
 
 
 declare double @foo(double)
-declare i64 @bar(i64*)
+declare i64 @bar(ptr)
 declare double @llvm.sin.f64(double)
 declare float @llvm.sin.f32(float)
 declare float @llvm.sqrt.f32(float)
 
 declare <vscale x 2 x double> @foo_vec(<vscale x 2 x double>)
-declare <vscale x 2 x i64> @bar_vec(<vscale x 2 x i64*>)
+declare <vscale x 2 x i64> @bar_vec(<vscale x 2 x ptr>)
 declare <vscale x 2 x double> @sin_vec_nxv2f64(<vscale x 2 x double>)
 declare <2 x double> @sin_vec_v2f64(<2 x double>)
 

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-predicate-instruction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-predicate-instruction.ll
index 4a6b1fb0c037a..8123651fcebff 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-predicate-instruction.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-predicate-instruction.ll
@@ -11,7 +11,7 @@ target triple = "aarch64-unknown-linux-gnu"
 ;      a[i] /= b[i];
 ;  }
 
-define void  @predication_in_loop(i32* %a, i32* %b, i32* %cond) #0 {
+define void  @predication_in_loop(ptr %a, ptr %b, ptr %cond) #0 {
 ; CHECK-LABEL: @predication_in_loop
 ; CHECK:  sdiv <vscale x 4 x i32>
 ;
@@ -23,18 +23,18 @@ for.cond.cleanup:                                 ; preds = %for.inc, %entry
 
 for.body:                                         ; preds = %entry, %for.inc
   %i.09 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %cond, i64 %i.09
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %cond, i64 %i.09
+  %0 = load i32, ptr %arrayidx, align 4
   %tobool.not = icmp eq i32 %0, 0
   br i1 %tobool.not, label %for.inc, label %if.then
 
 if.then:                                          ; preds = %for.body
-  %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 %i.09
-  %1 = load i32, i32* %arrayidx1, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %i.09
-  %2 = load i32, i32* %arrayidx2, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %b, i64 %i.09
+  %1 = load i32, ptr %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %i.09
+  %2 = load i32, ptr %arrayidx2, align 4
   %div = sdiv i32 %2, %1
-  store i32 %div, i32* %arrayidx2, align 4
+  store i32 %div, ptr %arrayidx2, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -56,7 +56,7 @@ for.inc:                                          ; preds = %for.body, %if.then
 ; otherwise it could  be able to vectorize, but will not because
 ; "Max legal vector width too small, scalable vectorization unfeasible.."
 
-define void @unpredicated_loop_predication_through_tailfolding(i32* %a, i32* %b) #0 {
+define void @unpredicated_loop_predication_through_tailfolding(ptr %a, ptr %b) #0 {
 ; CHECK-LABEL: @unpredicated_loop_predication_through_tailfolding
 ; CHECK-NOT:  sdiv <vscale x 4 x i32>
 
@@ -65,14 +65,14 @@ entry:
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %sdiv = sdiv i32 %1, %0
   %2 = add nuw nsw i64 %iv, 8
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %sdiv, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %sdiv, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions-tf.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions-tf.ll
index bb11b2df7d7f1..99895186bbcf7 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions-tf.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions-tf.ll
@@ -1,30 +1,30 @@
 ; RUN: opt < %s -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
 ; RUN:   -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S | FileCheck %s
 
-define void @invariant_store_red_exit_is_phi(i32* %dst, i32* readonly %src, i64 %n) {
+define void @invariant_store_red_exit_is_phi(ptr %dst, ptr readonly %src, i64 %n) {
 ; CHECK-LABEL: @invariant_store_red_exit_is_phi(
 ; CHECK: vector.ph:
 ; CHECK:      %[[ACTIVE_LANE_MASK_ENTRY:.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 %n)
 ; CHECK: vector.body:
 ; CHECK:      %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1> [ %[[ACTIVE_LANE_MASK_ENTRY]], %vector.ph ], [ %[[ACTIVE_LANE_MASK_NEXT:.*]], %vector.body ]
 ; CHECK:      %[[VEC_PHI:.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %vector.ph ], [ %[[PREDPHI:.*]], %vector.body ]
-; CHECK:      %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32
+; CHECK:      %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0
 ; CHECK-NEXT: %[[ADD:.*]] = add <vscale x 4 x i32> %[[VEC_PHI]], %[[LOAD]]
 ; CHECK-NEXT: %[[SELECT:.*]] = select <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x i32> %[[ADD]], <vscale x 4 x i32> %[[VEC_PHI]]
 ; CHECK:      %[[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %{{.*}}, i64 %n)
 ; CHECK: middle.block:
 ; CHECK-NEXT: %[[SUM:.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %[[SELECT]])
-; CHECK-NEXT: store i32 %[[SUM]], i32* %dst, align 4
+; CHECK-NEXT: store i32 %[[SUM]], ptr %dst, align 4
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.inc
   %red = phi i32 [ 0, %entry ], [ %storemerge, %for.body ]
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx6 = getelementptr inbounds i32, i32* %src, i64 %indvars.iv
-  %load = load i32, i32* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %src, i64 %indvars.iv
+  %load = load i32, ptr %arrayidx6, align 4
   %storemerge = add i32 %red, %load
-  store i32 %storemerge, i32* %dst, align 4
+  store i32 %storemerge, ptr %dst, align 4
   %indvars.iv.next = add nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.end.loopexit, label %for.body, !llvm.loop !0

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
index 381efbd5a52e0..fb8c7521906af 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
@@ -7,7 +7,7 @@
 ; ADD
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define i32 @add(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @add(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @add
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -23,8 +23,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi i32 [ 2, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -37,7 +37,7 @@ for.end:                                 ; preds = %for.body, %entry
 ; OR
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define i32 @or(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @or(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @or
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -53,8 +53,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi i32 [ 2, %entry ], [ %or, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %or = or i32 %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -67,7 +67,7 @@ for.end:                                 ; preds = %for.body, %entry
 ; AND
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define i32 @and(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @and(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @and
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -83,8 +83,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi i32 [ 2, %entry ], [ %and, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %and = and i32 %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -97,7 +97,7 @@ for.end:                                 ; preds = %for.body, %entry
 ; XOR
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define i32 @xor(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @xor(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @xor
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -113,8 +113,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi i32 [ 2, %entry ], [ %xor, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %xor = xor i32 %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -127,7 +127,7 @@ for.end:                                 ; preds = %for.body, %entry
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
 ; SMIN
 
-define i32 @smin(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @smin(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @smin
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -146,8 +146,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.010 = phi i32 [ 2, %entry ], [ %.sroa.speculated, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp.i = icmp slt i32 %0, %sum.010
   %.sroa.speculated = select i1 %cmp.i, i32 %0, i32 %sum.010
   %iv.next = add nuw nsw i64 %iv, 1
@@ -161,7 +161,7 @@ for.end:
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
 ; UMAX
 
-define i32 @umax(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @umax(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @umax
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -180,8 +180,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.010 = phi i32 [ 2, %entry ], [ %.sroa.speculated, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp.i = icmp ugt i32 %0, %sum.010
   %.sroa.speculated = select i1 %cmp.i, i32 %0, i32 %sum.010
   %iv.next = add nuw nsw i64 %iv, 1
@@ -195,7 +195,7 @@ for.end:
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
 ; FADD (FAST)
 
-define float @fadd_fast(float* noalias nocapture readonly %a, i64 %n) {
+define float @fadd_fast(ptr noalias nocapture readonly %a, i64 %n) {
 ; CHECK-LABEL: @fadd_fast
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float>
@@ -211,8 +211,8 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd fast float %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -224,7 +224,7 @@ for.end:
 
 ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
 ; CHECK-REMARK: vectorized loop (vectorization width: 8, interleaved count: 2)
-define bfloat @fadd_fast_bfloat(bfloat* noalias nocapture readonly %a, i64 %n) {
+define bfloat @fadd_fast_bfloat(ptr noalias nocapture readonly %a, i64 %n) {
 ; CHECK-LABEL: @fadd_fast_bfloat
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <8 x bfloat>
@@ -240,8 +240,8 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds bfloat, bfloat* %a, i64 %iv
-  %0 = load bfloat, bfloat* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv
+  %0 = load bfloat, ptr %arrayidx, align 4
   %add = fadd fast bfloat %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -254,7 +254,7 @@ for.end:
 ; FMIN (FAST)
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define float @fmin_fast(float* noalias nocapture readonly %a, i64 %n) #0 {
+define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
 ; CHECK-LABEL: @fmin_fast
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float>
@@ -273,8 +273,8 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp.i = fcmp fast olt float %0, %sum.07
   %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
@@ -288,7 +288,7 @@ for.end:
 ; FMAX (FAST)
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define float @fmax_fast(float* noalias nocapture readonly %a, i64 %n) #0 {
+define float @fmax_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
 ; CHECK-LABEL: @fmax_fast
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float>
@@ -307,8 +307,8 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp.i = fcmp fast ogt float %0, %sum.07
   %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
@@ -322,7 +322,7 @@ for.end:
 ; ADD (with reduction stored in invariant address)
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 4, interleaved count: 2)
-define void @invariant_store(i32* %dst, i32* readonly %src) {
+define void @invariant_store(ptr %dst, ptr readonly %src) {
 ; CHECK-LABEL: @invariant_store
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 4 x i32>
@@ -332,18 +332,18 @@ define void @invariant_store(i32* %dst, i32* readonly %src) {
 ; CHECK: middle.block:
 ; CHECK: %[[ADD:.*]] = add <vscale x 4 x i32> %[[ADD2]], %[[ADD1]]
 ; CHECK-NEXT: %[[SUM:.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %[[ADD]])
-; CHECK-NEXT: store i32 %[[SUM]], i32* %gep.dst, align 4
+; CHECK-NEXT: store i32 %[[SUM]], ptr %gep.dst, align 4
 entry:
-  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
-  store i32 0, i32* %gep.dst, align 4
+  %gep.dst = getelementptr inbounds i32, ptr %dst, i64 42
+  store i32 0, ptr %gep.dst, align 4
   br label %for.body
 for.body:
   %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %gep.src = getelementptr inbounds i32, i32* %src, i64 %indvars.iv
-  %0 = load i32, i32* %gep.src, align 4
+  %gep.src = getelementptr inbounds i32, ptr %src, i64 %indvars.iv
+  %0 = load i32, ptr %gep.src, align 4
   %add = add nsw i32 %sum, %0
-  store i32 %add, i32* %gep.dst, align 4
+  store i32 %add, ptr %gep.dst, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -358,7 +358,7 @@ for.cond.cleanup:
 
 ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
 ; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2)
-define i32 @mul(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @mul(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @mul
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <4 x i32>
@@ -374,8 +374,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi i32 [ 2, %entry ], [ %mul, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %mul = mul nsw i32 %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -388,7 +388,7 @@ for.end:                                 ; preds = %for.body, %entry
 ; Note: This test was added to ensure we always check the legality of reductions (end emit a warning if necessary) before checking for memory dependencies
 ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
 ; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2)
-define i32 @memory_dependence(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) {
+define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @memory_dependence
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <4 x i32>
@@ -408,14 +408,14 @@ entry:
 for.body:
   %i = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %sum = phi i32 [ %mul, %for.body ], [ 2, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %i
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 %i
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %i
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %b, i64 %i
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %0
   %add2 = add nuw nsw i64 %i, 32
-  %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %add2
-  store i32 %add, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 %add2
+  store i32 %add, ptr %arrayidx3, align 4
   %mul = mul nsw i32 %1, %sum
   %inc = add nuw nsw i64 %i, 1
   %exitcond.not = icmp eq i64 %inc, %n

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll
index 2a8f6ed3d7745..f28f77bf1b155 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll
@@ -30,20 +30,20 @@
 
 ; VF-4: <4 x i32>
 ; VF-VSCALE4: <16 x i32>
-define void @test0(i32* %a, i8* %b, i32* %c) #0 {
+define void @test0(ptr %a, ptr %b, ptr %c) #0 {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %c, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
-  %1 = load i8, i8* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %c, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
+  %1 = load i8, ptr %arrayidx2, align 4
   %zext = zext i8 %1 to i32
   %add = add nsw i32 %zext, %0
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %iv
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %iv
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll
index 3628ab05eba2d..e83eb729b521c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll
@@ -6,7 +6,7 @@
 ; Test that the MaxVF for the following loop, that has no dependence distances,
 ; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16
 ; (maximized bandwidth for i8 in the loop).
-define void @test0(i32* %a, i8* %b, i32* %c) #0 {
+define void @test0(ptr %a, ptr %b, ptr %c) #0 {
 ; CHECK: LV: Checking a loop in 'test0'
 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
 ; CHECK_SCALABLE_ON: LV: Selecting VF: 16
@@ -19,14 +19,14 @@ entry:
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %c, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
-  %1 = load i8, i8* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %c, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
+  %1 = load i8, ptr %arrayidx2, align 4
   %zext = zext i8 %1 to i32
   %add = add nsw i32 %zext, %0
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %iv
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %iv
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop
@@ -37,7 +37,7 @@ exit:
 
 ; Test that the MaxVF for the following loop, with a dependence distance
 ; of 64 elements, is calculated as (maxvscale = 16) * 4.
-define void @test1(i32* %a, i8* %b) #0 {
+define void @test1(ptr %a, ptr %b) #0 {
 ; CHECK: LV: Checking a loop in 'test1'
 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
 ; CHECK_SCALABLE_ON: LV: Selecting VF: 16
@@ -50,15 +50,15 @@ entry:
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
-  %1 = load i8, i8* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
+  %1 = load i8, ptr %arrayidx2, align 4
   %zext = zext i8 %1 to i32
   %add = add nsw i32 %zext, %0
   %2 = add nuw nsw i64 %iv, 64
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop
@@ -69,7 +69,7 @@ exit:
 
 ; Test that the MaxVF for the following loop, with a dependence distance
 ; of 32 elements, is calculated as (maxvscale = 16) * 2.
-define void @test2(i32* %a, i8* %b) #0 {
+define void @test2(ptr %a, ptr %b) #0 {
 ; CHECK: LV: Checking a loop in 'test2'
 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
 ; CHECK_SCALABLE_ON: LV: Selecting VF: 16
@@ -82,15 +82,15 @@ entry:
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
-  %1 = load i8, i8* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
+  %1 = load i8, ptr %arrayidx2, align 4
   %zext = zext i8 %1 to i32
   %add = add nsw i32 %zext, %0
   %2 = add nuw nsw i64 %iv, 32
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop
@@ -101,7 +101,7 @@ exit:
 
 ; Test that the MaxVF for the following loop, with a dependence distance
 ; of 16 elements, is calculated as (maxvscale = 16) * 1.
-define void @test3(i32* %a, i8* %b) #0 {
+define void @test3(ptr %a, ptr %b) #0 {
 ; CHECK: LV: Checking a loop in 'test3'
 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
 ; CHECK_SCALABLE_ON: LV: Selecting VF: 16
@@ -114,15 +114,15 @@ entry:
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
-  %1 = load i8, i8* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
+  %1 = load i8, ptr %arrayidx2, align 4
   %zext = zext i8 %1 to i32
   %add = add nsw i32 %zext, %0
   %2 = add nuw nsw i64 %iv, 16
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop
@@ -133,7 +133,7 @@ exit:
 
 ; Test the fallback mechanism when scalable vectors are not feasible due
 ; to e.g. dependence distance.
-define void @test4(i32* %a, i32* %b) #0 {
+define void @test4(ptr %a, ptr %b) #0 {
 ; CHECK: LV: Checking a loop in 'test4'
 ; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
 ; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
@@ -147,14 +147,14 @@ entry:
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 8
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
index b8a5d59fd42b0..47b159b1553b8 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
@@ -45,20 +45,20 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 ; CHECK-DBG: LV: Selecting VF: 4.
 ; CHECK-LABEL: @test1
 ; CHECK: <4 x i32>
-define void @test1(i32* %a, i32* %b) #0 {
+define void @test1(ptr %a, ptr %b) #0 {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 8
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0
@@ -90,20 +90,20 @@ exit:
 ; CHECK-DBG: LV: Selecting VF: 4.
 ; CHECK-LABEL: @test2
 ; CHECK: <4 x i32>
-define void @test2(i32* %a, i32* %b) #0 {
+define void @test2(ptr %a, ptr %b) #0 {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 4
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !3
@@ -138,20 +138,20 @@ exit:
 ; CHECK-DBG: LV: Using user VF vscale x 2.
 ; CHECK-LABEL: @test3
 ; CHECK: <vscale x 2 x i32>
-define void @test3(i32* %a, i32* %b) #0 {
+define void @test3(ptr %a, ptr %b) #0 {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 32
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !6
@@ -190,20 +190,20 @@ exit:
 ; CHECK-DBG: LV: Selecting VF: vscale x 2.
 ; CHECK-LABEL: @test4
 ; CHECK: <vscale x 2 x i32>
-define void @test4(i32* %a, i32* %b) #0 {
+define void @test4(ptr %a, ptr %b) #0 {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 32
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !9
@@ -238,20 +238,20 @@ exit:
 ; CHECK-DBG: LV: Using user VF vscale x 4
 ; CHECK-LABEL: @test5
 ; CHECK: <vscale x 4 x i32>
-define void @test5(i32* %a, i32* %b) #0 {
+define void @test5(ptr %a, ptr %b) #0 {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 128
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !12
@@ -289,20 +289,20 @@ exit:
 ; CHECK-DBG: Selecting VF: vscale x 4.
 ; CHECK-LABEL: @test6
 ; CHECK: <vscale x 4 x i32>
-define void @test6(i32* %a, i32* %b) #0 {
+define void @test6(ptr %a, ptr %b) #0 {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 128
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !15
@@ -322,18 +322,18 @@ exit:
 ; CHECK-NO-SVE-LABEL: @test_no_sve
 ; CHECK-NO-SVE: <4 x i32>
 ; CHECK-NO-SVE-NOT: <vscale x 4 x i32>
-define void @test_no_sve(i32* %a, i32* %b) #0 {
+define void @test_no_sve(ptr %a, ptr %b) #0 {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
-  store i32 %add, i32* %arrayidx, align 4
+  store i32 %add, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !18
@@ -356,20 +356,20 @@ exit:
 ; CHECK-DBG: LV: Selecting VF: 4.
 ; CHECK-LABEL: @test_no_max_vscale
 ; CHECK: <4 x i32>
-define void @test_no_max_vscale(i32* %a, i32* %b) #0 {
+define void @test_no_max_vscale(ptr %a, ptr %b) #0 {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 4
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !21

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalarize-store-with-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalarize-store-with-predication.ll
index c2581b92ceabf..bd3a01b124150 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalarize-store-with-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalarize-store-with-predication.ll
@@ -16,17 +16,17 @@ target triple = "aarch64-unknown-linux-gnu"
 ; architecture with masked loads/stores, but we use SVE for testing purposes
 ; here.
 
-define void @foo(i32* %data1, i32* %data2) {
+define void @foo(ptr %data1, ptr %data2) {
 ; CHECK-LABEL: @foo(
 ; CHECK:       vector.body:
 ; CHECK:         br i1 {{%.*}}, label %pred.store.if, label %pred.store.continue
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    store i32 {{%.*}}, i32* {{%.*}}
+; CHECK-NEXT:    store i32 {{%.*}}, ptr {{%.*}}
 ; CHECK-NEXT:    br label %pred.store.continue
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    br i1 {{%.*}}, label %pred.store.if1, label %pred.store.continue2
 ; CHECK:       pred.store.if1:
-; CHECK-NEXT:    store i32 {{%.*}}, i32* {{%.*}}
+; CHECK-NEXT:    store i32 {{%.*}}, ptr {{%.*}}
 ; CHECK-NEXT:    br label %pred.store.continue2
 ; CHECK:       pred.store.continue2:
 
@@ -35,13 +35,13 @@ entry:
 
 while.body:
   %i = phi i64 [ 1023, %entry ], [ %i.next, %if.end ]
-  %arrayidx = getelementptr inbounds i32, i32* %data1, i64 %i
-  %ld = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %data1, i64 %i
+  %ld = load i32, ptr %arrayidx, align 4
   %cmp = icmp sgt i32 %ld, %ld
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:
-  store i32 %ld, i32* %arrayidx, align 4
+  store i32 %ld, ptr %arrayidx, align 4
   br label %if.end
 
 if.end:

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll
index a0ef4f4804ed8..38fedc1faac23 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll
@@ -7,7 +7,7 @@ target triple = "aarch64--linux-gnu"
 @Foo = common global %struct.anon zeroinitializer, align 4
 
 ; CHECK-LABEL: @foo(
-; CHECK: load <4 x i32>, <4 x i32>*
+; CHECK: load <4 x i32>, ptr
 ; CHECK: sdiv <4 x i32>
 ; CHECK: store <4 x i32>
 
@@ -17,11 +17,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds %struct.anon, %struct.anon* @Foo, i64 0, i32 2, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds %struct.anon, ptr @Foo, i64 0, i32 2, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %div = sdiv i32 %0, 2
-  %arrayidx2 = getelementptr inbounds %struct.anon, %struct.anon* @Foo, i64 0, i32 0, i64 %indvars.iv
-  store i32 %div, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds %struct.anon, ptr @Foo, i64 0, i32 0, i64 %indvars.iv
+  store i32 %div, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 100
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll
index 604ac075da42e..1cde8b9bad6fc 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-ios5.0.0"
 
-define void @selects_1(i32* nocapture %dst, i32 %A, i32 %B, i32 %C, i32 %N) {
+define void @selects_1(ptr nocapture %dst, i32 %A, i32 %B, i32 %C, i32 %N) {
 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %cond = select i1 %cmp1, i32 10, i32 %and
 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %cond6 = select i1 %cmp2, i32 30, i32 %and
 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %cond11 = select i1 %cmp7, i32 %cond, i32 %cond6
@@ -27,8 +27,8 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %dst, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %dst, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %and = and i32 %0, 2047
   %cmp1 = icmp eq i32 %and, %A
   %cond = select i1 %cmp1, i32 10, i32 %and
@@ -36,7 +36,7 @@ for.body:                                         ; preds = %for.body.preheader,
   %cond6 = select i1 %cmp2, i32 30, i32 %and
   %cmp7 = icmp ugt i32 %cond, %C
   %cond11 = select i1 %cmp7, i32 %cond, i32 %cond6
-  store i32 %cond11, i32* %arrayidx, align 4
+  store i32 %cond11, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
index 1052b7fc91034..87347ef1162a9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
@@ -7,23 +7,23 @@ target triple = "aarch64--linux-gnu"
 ; CHECK-LABEL: Checking a loop in 'interleaved_access'
 ; CHECK:         The Smallest and Widest types: 64 / 64 bits
 ;
-define void @interleaved_access(i8** %A, i64 %N) {
+define void @interleaved_access(ptr %A, i64 %N) {
 for.ph:
   br label %for.body
 
 for.body:
   %i = phi i64 [ %i.next.3, %for.body ], [ 0, %for.ph ]
-  %tmp0 = getelementptr inbounds i8*, i8** %A, i64 %i
-  store i8* null, i8** %tmp0, align 8
+  %tmp0 = getelementptr inbounds ptr, ptr %A, i64 %i
+  store ptr null, ptr %tmp0, align 8
   %i.next.0 = add nuw nsw i64 %i, 1
-  %tmp1 = getelementptr inbounds i8*, i8** %A, i64 %i.next.0
-  store i8* null, i8** %tmp1, align 8
+  %tmp1 = getelementptr inbounds ptr, ptr %A, i64 %i.next.0
+  store ptr null, ptr %tmp1, align 8
   %i.next.1 = add nsw i64 %i, 2
-  %tmp2 = getelementptr inbounds i8*, i8** %A, i64 %i.next.1
-  store i8* null, i8** %tmp2, align 8
+  %tmp2 = getelementptr inbounds ptr, ptr %A, i64 %i.next.1
+  store ptr null, ptr %tmp2, align 8
   %i.next.2 = add nsw i64 %i, 3
-  %tmp3 = getelementptr inbounds i8*, i8** %A, i64 %i.next.2
-  store i8* null, i8** %tmp3, align 8
+  %tmp3 = getelementptr inbounds ptr, ptr %A, i64 %i.next.2
+  store ptr null, ptr %tmp3, align 8
   %i.next.3 = add nsw i64 %i, 4
   %cond = icmp slt i64 %i.next.3, %N
   br i1 %cond, label %for.body, label %for.end

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll
index 16080e5a12f9a..312718991ea11 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll
@@ -9,15 +9,15 @@ target triple="aarch64-unknown-linux-gnu"
 ; CHECK-VF4: Found an estimated cost of 21 for VF 4 For instruction:   %add = fadd float %0, %sum.07
 ; CHECK-VF8: Found an estimated cost of 42 for VF 8 For instruction:   %add = fadd float %0, %sum.07
 
-define float @fadd_strict32(float* noalias nocapture readonly %a, i64 %n) {
+define float @fadd_strict32(ptr noalias nocapture readonly %a, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd float %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -31,15 +31,15 @@ for.end:
 ; CHECK-VF4: Found an estimated cost of 18 for VF 4 For instruction:   %add = fadd double %0, %sum.07
 ; CHECK-VF8: Found an estimated cost of 36 for VF 8 For instruction:   %add = fadd double %0, %sum.07
 
-define double @fadd_strict64(double* noalias nocapture readonly %a, i64 %n) {
+define double @fadd_strict64(ptr noalias nocapture readonly %a, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %a, i64 %iv
-  %0 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %a, i64 %iv
+  %0 = load double, ptr %arrayidx, align 4
   %add = fadd double %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -52,17 +52,17 @@ for.end:
 ; CHECK-VF4: Found an estimated cost of 23 for VF 4 For instruction:   %muladd = tail call float @llvm.fmuladd.f32(float %0, float %1, float %sum.07)
 ; CHECK-VF8: Found an estimated cost of 46 for VF 8 For instruction:   %muladd = tail call float @llvm.fmuladd.f32(float %0, float %1, float %sum.07)
 
-define float @fmuladd_strict32(float* %a, float* %b, i64 %n) {
+define float @fmuladd_strict32(ptr %a, ptr %b, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %b, i64 %iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %iv
+  %1 = load float, ptr %arrayidx2, align 4
   %muladd = tail call float @llvm.fmuladd.f32(float %0, float %1, float %sum.07)
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -77,17 +77,17 @@ declare float @llvm.fmuladd.f32(float, float, float)
 ; CHECK-VF4: Found an estimated cost of 22 for VF 4 For instruction:   %muladd = tail call double @llvm.fmuladd.f64(double %0, double %1, double %sum.07)
 ; CHECK-VF8: Found an estimated cost of 44 for VF 8 For instruction:   %muladd = tail call double @llvm.fmuladd.f64(double %0, double %1, double %sum.07)
 
-define double @fmuladd_strict64(double* %a, double* %b, i64 %n) {
+define double @fmuladd_strict64(ptr %a, ptr %b, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi double [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %a, i64 %iv
-  %0 = load double, double* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds double, double* %b, i64 %iv
-  %1 = load double, double* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds double, ptr %a, i64 %iv
+  %0 = load double, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds double, ptr %b, i64 %iv
+  %1 = load double, ptr %arrayidx2, align 4
   %muladd = tail call double @llvm.fmuladd.f64(double %0, double %1, double %sum.07)
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-vf1.ll b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-vf1.ll
index 679c9dc9dedc6..a06c5a75510c4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-vf1.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-vf1.ll
@@ -6,7 +6,7 @@ target triple = "aarch64-unknown-linux-gnu"
 
 ; CHECK-DEBUG: LV: Not interleaving scalar ordered reductions.
 
-define void @foo(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %M, i64 %N) {
+define void @foo(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %M, i64 %N) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NOT: vector.body
 
@@ -15,7 +15,7 @@ entry:
 
 for.body.us:                                      ; preds = %entry, %for.cond3
   %i.023.us = phi i64 [ %inc8.us, %for.cond3 ], [ 0, %entry ]
-  %arrayidx.us = getelementptr inbounds float, float* %dst, i64 %i.023.us
+  %arrayidx.us = getelementptr inbounds float, ptr %dst, i64 %i.023.us
   %mul.us = mul nsw i64 %i.023.us, %N
   br label %for.body3.us
 
@@ -23,8 +23,8 @@ for.body3.us:                                     ; preds = %for.body.us, %for.b
   %0 = phi float [ 0.000000e+00, %for.body.us ], [ %add6.us, %for.body3.us ]
   %j.021.us = phi i64 [ 0, %for.body.us ], [ %inc.us, %for.body3.us ]
   %add.us = add nsw i64 %j.021.us, %mul.us
-  %arrayidx4.us = getelementptr inbounds float, float* %src, i64 %add.us
-  %1 = load float, float* %arrayidx4.us, align 4
+  %arrayidx4.us = getelementptr inbounds float, ptr %src, i64 %add.us
+  %1 = load float, ptr %arrayidx4.us, align 4
   %add6.us = fadd float %1, %0
   %inc.us = add nuw nsw i64 %j.021.us, 1
   %exitcond.not = icmp eq i64 %inc.us, %N
@@ -32,7 +32,7 @@ for.body3.us:                                     ; preds = %for.body.us, %for.b
 
 for.cond3:                                        ; preds = %for.body3.us
   %add6.us.lcssa = phi float [ %add6.us, %for.body3.us ]
-  store float %add6.us.lcssa, float* %arrayidx.us, align 4
+  store float %add6.us.lcssa, ptr %arrayidx.us, align 4
   %inc8.us = add nuw nsw i64 %i.023.us, 1
   %exitcond26.not = icmp eq i64 %inc8.us, %M
   br i1 %exitcond26.not, label %exit, label %for.body.us

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
index a9b90f58edf92..8ee0338c1468a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
@@ -5,26 +5,26 @@
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-unknown-linux-gnu"
 
-define void @cmpsel_i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) {
+define void @cmpsel_i32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @cmpsel_i32(
 ; CHECK-NEXT:  entry:
 ; CHECK:       vector.body:
-; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* {{.*}}, align 4
+; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr {{.*}}, align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = select <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 10, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK:         store <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32>* {{.*}}, align 4
+; CHECK:         store <vscale x 4 x i32> [[TMP2]], ptr {{.*}}, align 4
 ;
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %tobool.not = icmp eq i32 %0, 0
   %cond = select i1 %tobool.not, i32 2, i32 10
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 %cond, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %cond, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.end.loopexit, label %for.body, !llvm.loop !0
@@ -36,26 +36,26 @@ for.end:                                          ; preds = %for.end.loopexit, %
   ret void
 }
 
-define void @cmpsel_f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) {
+define void @cmpsel_f32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @cmpsel_f32(
 ; CHECK-NEXT:  entry:
 ; CHECK:       vector.body:
-; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, <vscale x 4 x float>* {{.*}}, align 4
+; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr {{.*}}, align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP2:%.*]] = select <vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+01, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK:         store <vscale x 4 x float> [[TMP2]], <vscale x 4 x float>* {{.*}}, align 4
+; CHECK:         store <vscale x 4 x float> [[TMP2]], ptr {{.*}}, align 4
 
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp ogt float %0, 3.000000e+00
   %conv = select i1 %cmp1, float 1.000000e+01, float 2.000000e+00
-  %arrayidx3 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %conv, float* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %conv, ptr %arrayidx3, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -64,24 +64,24 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-define void @fneg_f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) {
+define void @fneg_f32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @fneg_f32(
 ; CHECK-NEXT:  entry:
 ; CHECK:       vector.body:
-; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, <vscale x 4 x float>* {{.*}}, align 4
+; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr {{.*}}, align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = fneg <vscale x 4 x float> [[WIDE_LOAD]]
-; CHECK:         store <vscale x 4 x float> [[TMP1]], <vscale x 4 x float>* {{.*}}, align 4
+; CHECK:         store <vscale x 4 x float> [[TMP1]], ptr {{.*}}, align 4
 
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %fneg = fneg float %0
-  %arrayidx3 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %fneg, float* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %fneg, ptr %arrayidx3, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
index 6d1bb88b8ac08..a5ebd005df7c5 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -passes=loop-vectorize,dce,instcombine -mtriple aarch64-linux-gnu -mattr=+sve \
 ; RUN:   -prefer-predicate-over-epilogue=scalar-epilogue -S %s -o - | FileCheck %s
 
-define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) #0 {
+define void @cond_inv_load_i32i32i16(ptr noalias nocapture %a, ptr noalias nocapture readonly %cond, ptr noalias nocapture readonly %inv, i64 %n) #0 {
 ; CHECK-LABEL: @cond_inv_load_i32i32i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -14,20 +14,18 @@ define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias noc
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i16*> poison, i16* [[INV:%.*]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i16*> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i16*> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[INV:%.*]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*> [[BROADCAST_SPLAT]], i32 2, <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i16> poison)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 2, <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i16> poison)
 ; CHECK-NEXT:    [[TMP7:%.*]] = sext <vscale x 4 x i16> [[WIDE_MASKED_GATHER]] to <vscale x 4 x i32>
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[TMP7]], <vscale x 4 x i32>* [[TMP9]], i32 4, <vscale x 4 x i1> [[TMP6]])
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP7]], ptr [[TMP8]], i32 4, <vscale x 4 x i1> [[TMP6]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
@@ -41,15 +39,15 @@ define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias noc
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[COND]], i64 [[I_07]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[I_07]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP13]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP14:%.*]] = load i16, i16* [[INV]], align 2
+; CHECK-NEXT:    [[TMP14:%.*]] = load i16, ptr [[INV]], align 2
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP14]] to i32
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_07]]
-; CHECK-NEXT:    store i32 [[CONV]], i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_07]]
+; CHECK-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_07]], 1
@@ -63,16 +61,16 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.inc
   %i.07 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %cond, i64 %i.07
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %cond, i64 %i.07
+  %0 = load i32, ptr %arrayidx, align 4
   %tobool.not = icmp eq i32 %0, 0
   br i1 %tobool.not, label %for.inc, label %if.then
 
 if.then:                                          ; preds = %for.body
-  %1 = load i16, i16* %inv, align 2
+  %1 = load i16, ptr %inv, align 2
   %conv = sext i16 %1 to i32
-  %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %i.07
-  store i32 %conv, i32* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %i.07
+  store i32 %conv, ptr %arrayidx1, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -84,7 +82,7 @@ exit:                        ; preds = %for.inc
   ret void
 }
 
-define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noalias nocapture readonly %cond, double* noalias nocapture readonly %inv, i64 %n) #0 {
+define void @cond_inv_load_f64f64f64(ptr noalias nocapture %a, ptr noalias nocapture readonly %cond, ptr noalias nocapture readonly %inv, i64 %n) #0 {
 ; CHECK-LABEL: @cond_inv_load_f64f64f64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -96,19 +94,17 @@ define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noali
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x double*> poison, double* [[INV:%.*]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x double*> [[BROADCAST_SPLATINSERT]], <vscale x 4 x double*> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[INV:%.*]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds double, double* [[COND:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[TMP4]] to <vscale x 4 x double>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x double>, <vscale x 4 x double>* [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds double, ptr [[COND:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x double>, ptr [[TMP4]], align 8
 ; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ogt <vscale x 4 x double> [[WIDE_LOAD]], shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 4.000000e-01, i32 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*> [[BROADCAST_SPLAT]], i32 8, <vscale x 4 x i1> [[TMP6]], <vscale x 4 x double> poison)
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr double, double* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast double* [[TMP7]] to <vscale x 4 x double>*
-; CHECK-NEXT:    call void @llvm.masked.store.nxv4f64.p0nxv4f64(<vscale x 4 x double> [[WIDE_MASKED_GATHER]], <vscale x 4 x double>* [[TMP8]], i32 8, <vscale x 4 x i1> [[TMP6]])
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 4 x i1> [[TMP6]], <vscale x 4 x double> poison)
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4f64.p0(<vscale x 4 x double> [[WIDE_MASKED_GATHER]], ptr [[TMP7]], i32 8, <vscale x 4 x i1> [[TMP6]])
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
@@ -122,14 +118,14 @@ define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noali
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[COND]], i64 [[I_08]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load double, double* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[COND]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load double, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt double [[TMP12]], 4.000000e-01
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP13:%.*]] = load double, double* [[INV]], align 8
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[I_08]]
-; CHECK-NEXT:    store double [[TMP13]], double* [[ARRAYIDX2]], align 8
+; CHECK-NEXT:    [[TMP13:%.*]] = load double, ptr [[INV]], align 8
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]]
+; CHECK-NEXT:    store double [[TMP13]], ptr [[ARRAYIDX2]], align 8
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
@@ -143,15 +139,15 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.inc
   %i.08 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %cond, i64 %i.08
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %cond, i64 %i.08
+  %0 = load double, ptr %arrayidx, align 8
   %cmp1 = fcmp ogt double %0, 4.000000e-01
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %1 = load double, double* %inv, align 8
-  %arrayidx2 = getelementptr inbounds double, double* %a, i64 %i.08
-  store double %1, double* %arrayidx2, align 8
+  %1 = load double, ptr %inv, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %a, i64 %i.08
+  store double %1, ptr %arrayidx2, align 8
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -163,7 +159,7 @@ exit:                        ; preds = %for.inc
   ret void
 }
 
-define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %cond, i64 %n) #0 {
+define void @invariant_load_cond(ptr noalias nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %cond, i64 %n) #0 {
 ; CHECK-LABEL: @invariant_load_cond(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -178,21 +174,18 @@ define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture reado
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 42
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32*> poison, i32* [[TMP4]], i64 0
-; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32*> [[DOTSPLATINSERT]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 42
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[TMP4]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[DOTSPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, i32* [[B]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP9]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> poison)
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[DOTSPLAT]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> poison)
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP8]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> poison)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[DOTSPLAT]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> poison)
 ; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_LOAD]]
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[TMP10]], <vscale x 4 x i32>* [[TMP12]], i32 4, <vscale x 4 x i1> [[TMP7]])
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP10]], ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP7]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]]
@@ -206,18 +199,18 @@ define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture reado
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[COND]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP16]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 42
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
-; CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 42
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP17]]
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV]]
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
@@ -231,19 +224,19 @@ entry:
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
-  %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 42
-  %arrayidx2 = getelementptr inbounds i32, i32* %cond, i64 %iv
-  %0 = load i32, i32* %arrayidx2, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %b, i64 42
+  %arrayidx2 = getelementptr inbounds i32, ptr %cond, i64 %iv
+  %0 = load i32, ptr %arrayidx2, align 4
   %tobool.not = icmp eq i32 %0, 0
   br i1 %tobool.not, label %for.inc, label %if.then
 
 if.then:
-  %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx3, align 4
-  %2 = load i32, i32* %arrayidx1, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx3, align 4
+  %2 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %2, %1
-  %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %iv
-  store i32 %add, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv
+  store i32 %add, ptr %arrayidx4, align 4
   br label %for.inc
 
 for.inc:

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll
index 64a0d7d3f4404..3fc1a15cbd9b2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll
@@ -5,19 +5,19 @@
 target triple="aarch64--linux-gnu"
 
 ; CHECK: LV: Checking a loop in 'gather_nxv4i32_loaded_index'
-; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   %1 = load float, float* %arrayidx3, align 4
-define void @gather_nxv4i32_loaded_index(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 {
+; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   %1 = load float, ptr %arrayidx3, align 4
+define void @gather_nxv4i32_loaded_index(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i64 %n) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %b, i64 %indvars.iv
-  %0 = load i64, i64* %arrayidx, align 8
-  %arrayidx3 = getelementptr inbounds float, float* %a, i64 %0
-  %1 = load float, float* %arrayidx3, align 4
-  %arrayidx5 = getelementptr inbounds float, float* %c, i64 %indvars.iv
-  store float %1, float* %arrayidx5, align 4
+  %arrayidx = getelementptr inbounds i64, ptr %b, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx, align 8
+  %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %0
+  %1 = load float, ptr %arrayidx3, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %c, i64 %indvars.iv
+  store float %1, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
@@ -27,19 +27,19 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 }
 
 ; CHECK: LV: Checking a loop in 'scatter_nxv4i32_loaded_index'
-; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   store float %1, float* %arrayidx5, align 4
-define void @scatter_nxv4i32_loaded_index(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 {
+; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   store float %1, ptr %arrayidx5, align 4
+define void @scatter_nxv4i32_loaded_index(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i64 %n) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %b, i64 %indvars.iv
-  %0 = load i64, i64* %arrayidx, align 8
-  %arrayidx3 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %1 = load float, float* %arrayidx3, align 4
-  %arrayidx5 = getelementptr inbounds float, float* %c, i64 %0
-  store float %1, float* %arrayidx5, align 4
+  %arrayidx = getelementptr inbounds i64, ptr %b, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx, align 8
+  %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx3, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %c, i64 %0
+  store float %1, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
@@ -51,18 +51,18 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 ; NOTE: For runtime-determined strides the vectoriser versions the loop and adds SCEV checks
 ; to ensure the stride value is always 1. Therefore, it can assume a contiguous load and a cost of 1.
 ; CHECK: LV: Checking a loop in 'gather_nxv4i32_unknown_stride'
-; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction:   %0 = load float, float* %arrayidx, align 4
-define void @gather_nxv4i32_unknown_stride(float* noalias nocapture readonly %a, float* noalias nocapture %b, i64 %stride, i64 %n) #0 {
+; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction:   %0 = load float, ptr %arrayidx, align 4
+define void @gather_nxv4i32_unknown_stride(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i64 %stride, i64 %n) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %indvars.iv.stride2 = mul i64 %indvars.iv, %stride
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv.stride2
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %0, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv.stride2
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %0, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
@@ -74,18 +74,18 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 ; NOTE: For runtime-determined strides the vectoriser versions the loop and adds SCEV checks
 ; to ensure the stride value is always 1. Therefore, it can assume a contiguous load and cost is 1.
 ; CHECK: LV: Checking a loop in 'scatter_nxv4i32_unknown_stride'
-; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction:   store float %0, float* %arrayidx2, align 4
-define void @scatter_nxv4i32_unknown_stride(float* noalias nocapture readonly %a, float* noalias nocapture %b, i64 %stride, i64 %n) #0 {
+; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction:   store float %0, ptr %arrayidx2, align 4
+define void @scatter_nxv4i32_unknown_stride(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i64 %stride, i64 %n) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %indvars.iv.stride2 = mul i64 %indvars.iv, %stride
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv.stride2
-  store float %0, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv.stride2
+  store float %0, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
@@ -95,18 +95,18 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 }
 
 ; CHECK: LV: Checking a loop in 'gather_nxv4i32_stride2'
-; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   %0 = load float, float* %arrayidx, align 4
-define void @gather_nxv4i32_stride2(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
+; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   %0 = load float, ptr %arrayidx, align 4
+define void @gather_nxv4i32_stride2(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %indvars.iv.stride2 = mul i64 %indvars.iv, 2
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv.stride2
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %0, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv.stride2
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %0, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
@@ -116,18 +116,18 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 }
 
 ; CHECK: LV: Checking a loop in 'scatter_nxv4i32_stride2'
-; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   store float %0, float* %arrayidx2, align 4
-define void @scatter_nxv4i32_stride2(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
+; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   store float %0, ptr %arrayidx2, align 4
+define void @scatter_nxv4i32_stride2(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %indvars.iv.stride2 = mul i64 %indvars.iv, 2
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv.stride2
-  store float %0, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv.stride2
+  store float %0, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
@@ -138,18 +138,18 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 
 
 ; CHECK: LV: Checking a loop in 'gather_nxv4i32_stride64'
-; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   %0 = load float, float* %arrayidx, align 4
-define void @gather_nxv4i32_stride64(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
+; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   %0 = load float, ptr %arrayidx, align 4
+define void @gather_nxv4i32_stride64(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %indvars.iv.stride2 = mul i64 %indvars.iv, 64
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv.stride2
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %0, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv.stride2
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %0, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
@@ -159,18 +159,18 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 }
 
 ; CHECK: LV: Checking a loop in 'scatter_nxv4i32_stride64'
-; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   store float %0, float* %arrayidx2, align 4
-define void @scatter_nxv4i32_stride64(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
+; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   store float %0, ptr %arrayidx2, align 4
+define void @scatter_nxv4i32_stride64(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %indvars.iv.stride2 = mul i64 %indvars.iv, 64
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv.stride2
-  store float %0, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv.stride2
+  store float %0, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
index 14db7f27c6092..7a630feada12a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -passes=loop-vectorize,dce,instcombine -mtriple aarch64-linux-gnu -mattr=+sve \
 ; RUN:   -prefer-predicate-over-epilogue=scalar-epilogue -S %s -force-target-instruction-cost=1 -o - | FileCheck %s
 
-define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 {
+define void @gather_nxv4i32_ind64(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i64 %n) #0 {
 ; CHECK-LABEL: @gather_nxv4i32_ind64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -17,14 +17,12 @@ define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noa
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <vscale x 4 x i64>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i64>, <vscale x 4 x i64>* [[TMP5]], align 8
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], <vscale x 4 x i64> [[WIDE_LOAD]]
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> [[TMP6]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast float* [[TMP7]] to <vscale x 4 x float>*
-; CHECK-NEXT:    store <vscale x 4 x float> [[WIDE_MASKED_GATHER]], <vscale x 4 x float>* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i64>, ptr [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], <vscale x 4 x i64> [[WIDE_LOAD]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP6]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <vscale x 4 x float> [[WIDE_MASKED_GATHER]], ptr [[TMP7]], align 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
@@ -38,12 +36,12 @@ define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noa
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load float, float* [[ARRAYIDX3]], align 4
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store float [[TMP13]], float* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store float [[TMP13]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
@@ -55,12 +53,12 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %b, i64 %indvars.iv
-  %0 = load i64, i64* %arrayidx, align 8
-  %arrayidx3 = getelementptr inbounds float, float* %a, i64 %0
-  %1 = load float, float* %arrayidx3, align 4
-  %arrayidx5 = getelementptr inbounds float, float* %c, i64 %indvars.iv
-  store float %1, float* %arrayidx5, align 4
+  %arrayidx = getelementptr inbounds i64, ptr %b, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx, align 8
+  %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %0
+  %1 = load float, ptr %arrayidx3, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %c, i64 %indvars.iv
+  store float %1, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
@@ -72,7 +70,7 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 ; NOTE: I deliberately chose '%b' as an array of i32 indices, since the
 ; additional 'sext' in the for.body loop exposes additional code paths
 ; during vectorisation.
-define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias nocapture readonly %b, float* noalias nocapture readonly %c, i64 %n) #0 {
+define void @scatter_nxv4i32_ind32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c, i64 %n) #0 {
 ; CHECK-LABEL: @scatter_nxv4i32_ind32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -87,15 +85,13 @@ define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias noc
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <vscale x 4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, <vscale x 4 x float>* [[TMP5]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = sext <vscale x 4 x i32> [[WIDE_LOAD1]] to <vscale x 4 x i64>
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[A:%.*]], <vscale x 4 x i64> [[TMP8]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float*> [[TMP9]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], <vscale x 4 x i64> [[TMP8]]
+; CHECK-NEXT:    call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x ptr> [[TMP9]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
@@ -109,13 +105,13 @@ define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias noc
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
 ; CHECK-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[TMP14]] to i64
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IDXPROM4]]
-; CHECK-NEXT:    store float [[TMP13]], float* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IDXPROM4]]
+; CHECK-NEXT:    store float [[TMP13]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -127,13 +123,13 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %c, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx3, align 4
+  %arrayidx = getelementptr inbounds float, ptr %c, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx3, align 4
   %idxprom4 = sext i32 %1 to i64
-  %arrayidx5 = getelementptr inbounds float, float* %a, i64 %idxprom4
-  store float %0, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %idxprom4
+  store float %0, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
@@ -142,7 +138,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
   ret void
 }
 
-define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocapture readonly %b, i64 %n) #0 {
+define void @scatter_inv_nxv4i32(ptr noalias nocapture %inv, ptr noalias nocapture readonly %b, i64 %n) #0 {
 ; CHECK-LABEL: @scatter_inv_nxv4i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -154,16 +150,15 @@ define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocap
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32*> poison, i32* [[INV:%.*]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32*> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[INV:%.*]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32*> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP6]])
+; CHECK-NEXT:    call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP6]])
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
@@ -177,12 +172,12 @@ define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocap
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP10]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    store i32 3, i32* [[INV]], align 4
+; CHECK-NEXT:    store i32 3, ptr [[INV]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -196,13 +191,13 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %tobool.not = icmp eq i32 %0, 0
   br i1 %tobool.not, label %for.inc, label %if.then
 
 if.then:                                          ; preds = %for.body
-  store i32 3, i32* %inv, align 4
+  store i32 3, ptr %inv, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -214,7 +209,7 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
   ret void
 }
 
-define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %inv, i64 %n) #0 {
+define void @gather_inv_nxv4i32(ptr noalias nocapture %a, ptr noalias nocapture readonly %inv, i64 %n) #0 {
 ; CHECK-LABEL: @gather_inv_nxv4i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -226,18 +221,16 @@ define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocaptur
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32*> poison, i32* [[INV:%.*]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32*> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[INV:%.*]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i32> poison)
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP4]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[WIDE_MASKED_GATHER]], <vscale x 4 x i32>* [[TMP7]], i32 4, <vscale x 4 x i1> [[TMP6]])
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i32> poison)
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[WIDE_MASKED_GATHER]], ptr [[TMP4]], i32 4, <vscale x 4 x i1> [[TMP6]])
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
@@ -251,13 +244,13 @@ define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocaptur
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[TMP11]], 3
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[INV]], align 4
-; CHECK-NEXT:    store i32 [[TMP12]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[INV]], align 4
+; CHECK-NEXT:    store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -271,14 +264,14 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp2 = icmp sgt i32 %0, 3
   br i1 %cmp2, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %1 = load i32, i32* %inv, align 4
-  store i32 %1, i32* %arrayidx, align 4
+  %1 = load i32, ptr %inv, align 4
+  store i32 %1, ptr %arrayidx, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -292,7 +285,7 @@ for.cond.cleanup:                                 ; preds = %for.inc, %entry
 
 
 
-define void @gather_nxv4i32_ind64_stride2(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) #0 {
+define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
 ; CHECK-LABEL: @gather_nxv4i32_ind64_stride2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -316,19 +309,17 @@ define void @gather_nxv4i32_ind64_stride2(float* noalias nocapture %a, float* no
 ; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = shl <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP8:%.*]] = shl <vscale x 4 x i64> [[STEP_ADD]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[B:%.*]], <vscale x 4 x i64> [[TMP7]]
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[B]], <vscale x 4 x i64> [[TMP8]]
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> [[TMP9]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER2:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> [[TMP10]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast float* [[TMP11]] to <vscale x 4 x float>*
-; CHECK-NEXT:    store <vscale x 4 x float> [[WIDE_MASKED_GATHER]], <vscale x 4 x float>* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], <vscale x 4 x i64> [[TMP7]]
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], <vscale x 4 x i64> [[TMP8]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP9]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER2:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <vscale x 4 x float> [[WIDE_MASKED_GATHER]], ptr [[TMP11]], align 4
 ; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP14:%.*]] = shl nuw nsw i32 [[TMP13]], 2
 ; CHECK-NEXT:    [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP17:%.*]] = bitcast float* [[TMP16]] to <vscale x 4 x float>*
-; CHECK-NEXT:    store <vscale x 4 x float> [[WIDE_MASKED_GATHER2]], <vscale x 4 x float>* [[TMP17]], align 4
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP15]]
+; CHECK-NEXT:    store <vscale x 4 x float> [[WIDE_MASKED_GATHER2]], ptr [[TMP16]], align 4
 ; CHECK-NEXT:    [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP19:%.*]] = shl nuw nsw i64 [[TMP18]], 3
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]]
@@ -344,10 +335,10 @@ define void @gather_nxv4i32_ind64_stride2(float* noalias nocapture %a, float* no
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[INDVARS_IV_STRIDE2:%.*]] = shl i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV_STRIDE2]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load float, float* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store float [[TMP21]], float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV_STRIDE2]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store float [[TMP21]], ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
@@ -360,10 +351,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %indvars.iv.stride2 = mul i64 %indvars.iv, 2
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv.stride2
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %0, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv.stride2
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %0, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll
index 84b1391b54ef4..739010b5b631f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll
@@ -11,7 +11,7 @@ target triple = "aarch64-linux-gnu"
 ;       a[i] = b[i];
 ;   }
 
-define void @cond_ind64(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) #0 {
+define void @cond_ind64(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
 ; CHECK-LABEL: @cond_ind64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -33,12 +33,10 @@ define void @cond_ind64(i32* noalias nocapture %a, i32* noalias nocapture readon
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP7:%.*]] = trunc <vscale x 4 x i64> [[VEC_IND]] to <vscale x 4 x i1>
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP9]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> poison)
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[WIDE_MASKED_LOAD]], <vscale x 4 x i32>* [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP7]])
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP8]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> poison)
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[WIDE_MASKED_LOAD]], ptr [[TMP10]], i32 4, <vscale x 4 x i1> [[TMP7]])
 ; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP13:%.*]] = shl i64 [[TMP12]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]]
@@ -57,10 +55,10 @@ define void @cond_ind64(i32* noalias nocapture %a, i32* noalias nocapture readon
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i64 [[AND]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I_08]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_08]]
-; CHECK-NEXT:    store i32 [[TMP15]], i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_08]]
+; CHECK-NEXT:    store i32 [[TMP15]], ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
@@ -79,10 +77,10 @@ for.body:                                         ; preds = %entry, %for.inc
   br i1 %tobool.not, label %for.inc, label %if.then
 
 if.then:                                          ; preds = %for.body
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.08
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %i.08
-  store i32 %0, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %i.08
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %i.08
+  store i32 %0, ptr %arrayidx1, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
index 90a7b10fa6493..05a6ebe2d2221 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
@@ -1,28 +1,28 @@
 ; RUN: opt -S -passes=loop-vectorize -mattr=+sve -mtriple aarch64-linux-gnu \
 ; RUN:   -prefer-predicate-over-epilogue=scalar-epilogue < %s | FileCheck %s
 
-define void @invariant_load(i64 %n, i32* noalias nocapture %a, i32* nocapture readonly %b) {
+define void @invariant_load(i64 %n, ptr noalias nocapture %a, ptr nocapture readonly %b) {
 ; CHECK-LABEL: @invariant_load
 ; CHECK: vector.body:
-; CHECK: %[[GEP:.*]] = getelementptr inbounds i32, i32* %b, i64 42
-; CHECK-NEXT: %[[INVLOAD:.*]] = load i32, i32* %[[GEP]]
+; CHECK: %[[GEP:.*]] = getelementptr inbounds i32, ptr %b, i64 42
+; CHECK-NEXT: %[[INVLOAD:.*]] = load i32, ptr %[[GEP]]
 ; CHECK-NEXT: %[[SPLATINS:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[INVLOAD]], i32 0
 ; CHECK-NEXT: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[SPLATINS]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK: %[[LOAD:.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>*
+; CHECK: %[[LOAD:.*]] = load <vscale x 4 x i32>, ptr
 ; CHECK-NEXT: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[SPLAT]], %[[LOAD]]
-; CHECK: store <vscale x 4 x i32> %[[ADD]], <vscale x 4 x i32>*
+; CHECK: store <vscale x 4 x i32> %[[ADD]], ptr
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 42
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 42
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %0, %1
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %iv
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %iv
+  store i32 %add, ptr %arrayidx2, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
index 51fc8f909d529..31abba4113373 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
@@ -1,25 +1,25 @@
 ; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -passes=loop-vectorize,dce,instcombine -S \
 ; RUN:   -prefer-predicate-over-epilogue=scalar-epilogue <%s | FileCheck %s
 
-define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) #0 {
+define void @stride7_i32(ptr noalias nocapture %dst, i64 %n) #0 {
 ; CHECK-LABEL: @stride7_i32(
 ; CHECK:      vector.body
 ; CHECK:        %[[VEC_IND:.*]] = phi <vscale x 4 x i64> [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ]
 ; CHECK-NEXT:   %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 4 x i64> %[[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 7, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:   %[[PTRS:.*]] = getelementptr inbounds i32, i32* %dst, <vscale x 4 x i64> %[[PTR_INDICES]]
-; CHECK-NEXT:   %[[GLOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %[[PTRS]]
+; CHECK-NEXT:   %[[PTRS:.*]] = getelementptr inbounds i32, ptr %dst, <vscale x 4 x i64> %[[PTR_INDICES]]
+; CHECK-NEXT:   %[[GLOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %[[PTRS]]
 ; CHECK-NEXT:   %[[VALS:.*]] = add nsw <vscale x 4 x i32> %[[GLOAD]],
-; CHECK-NEXT:   call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %[[VALS]], <vscale x 4 x i32*> %[[PTRS]]
+; CHECK-NEXT:   call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %[[VALS]], <vscale x 4 x ptr> %[[PTRS]]
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %mul = mul nuw nsw i64 %i.05, 7
-  %arrayidx = getelementptr inbounds i32, i32* %dst, i64 %mul
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %dst, i64 %mul
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, 3
-  store i32 %add, i32* %arrayidx, align 4
+  store i32 %add, ptr %arrayidx, align 4
   %inc = add nuw nsw i64 %i.05, 1
   %exitcond.not = icmp eq i64 %inc, %n
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -28,25 +28,25 @@ for.end:                                          ; preds = %for.end.loopexit, %
   ret void
 }
 
-define void @stride7_f64(double* noalias nocapture %dst, i64 %n) #0 {
+define void @stride7_f64(ptr noalias nocapture %dst, i64 %n) #0 {
 ; CHECK-LABEL: @stride7_f64(
 ; CHECK:      vector.body
 ; CHECK:        %[[VEC_IND:.*]] = phi <vscale x 2 x i64> [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ]
 ; CHECK-NEXT:   %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 2 x i64> %[[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 7, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT:   %[[PTRS:.*]] = getelementptr inbounds double, double* %dst, <vscale x 2 x i64> %[[PTR_INDICES]]
-; CHECK-NEXT:   %[[GLOAD:.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*> %[[PTRS]],
+; CHECK-NEXT:   %[[PTRS:.*]] = getelementptr inbounds double, ptr %dst, <vscale x 2 x i64> %[[PTR_INDICES]]
+; CHECK-NEXT:   %[[GLOAD:.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> %[[PTRS]],
 ; CHECK-NEXT:   %[[VALS:.*]] = fadd <vscale x 2 x double> %[[GLOAD]],
-; CHECK-NEXT:  call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> %[[VALS]], <vscale x 2 x double*> %[[PTRS]],
+; CHECK-NEXT:  call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> %[[VALS]], <vscale x 2 x ptr> %[[PTRS]],
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %mul = mul nuw nsw i64 %i.05, 7
-  %arrayidx = getelementptr inbounds double, double* %dst, i64 %mul
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %dst, i64 %mul
+  %0 = load double, ptr %arrayidx, align 8
   %add = fadd double %0, 1.000000e+00
-  store double %add, double* %arrayidx, align 8
+  store double %add, ptr %arrayidx, align 8
   %inc = add nuw nsw i64 %i.05, 1
   %exitcond.not = icmp eq i64 %inc, %n
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !6
@@ -56,30 +56,30 @@ for.end:                                          ; preds = %for.end.loopexit, %
 }
 
 
-define void @cond_stride7_f64(double* noalias nocapture %dst, i64* noalias nocapture readonly %cond, i64 %n) #0 {
+define void @cond_stride7_f64(ptr noalias nocapture %dst, ptr noalias nocapture readonly %cond, i64 %n) #0 {
 ; CHECK-LABEL: @cond_stride7_f64(
 ; CHECK:      vector.body
 ; CHECK:        %[[MASK:.*]] = icmp ne <vscale x 2 x i64>
-; CHECK:        %[[PTRS:.*]] = getelementptr inbounds double, double* %dst, <vscale x 2 x i64> %{{.*}}
-; CHECK-NEXT:   %[[GLOAD:.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*> %[[PTRS]], i32 8, <vscale x 2 x i1> %[[MASK]]
+; CHECK:        %[[PTRS:.*]] = getelementptr inbounds double, ptr %dst, <vscale x 2 x i64> %{{.*}}
+; CHECK-NEXT:   %[[GLOAD:.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> %[[PTRS]], i32 8, <vscale x 2 x i1> %[[MASK]]
 ; CHECK-NEXT:   %[[VALS:.*]] = fadd <vscale x 2 x double> %[[GLOAD]],
-; CHECK-NEXT:  call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> %[[VALS]], <vscale x 2 x double*> %[[PTRS]], i32 8, <vscale x 2 x i1> %[[MASK]])
+; CHECK-NEXT:  call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> %[[VALS]], <vscale x 2 x ptr> %[[PTRS]], i32 8, <vscale x 2 x i1> %[[MASK]])
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.inc
   %i.07 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %cond, i64 %i.07
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %cond, i64 %i.07
+  %0 = load i64, ptr %arrayidx, align 8
   %tobool.not = icmp eq i64 %0, 0
   br i1 %tobool.not, label %for.inc, label %if.then
 
 if.then:                                          ; preds = %for.body
   %mul = mul nsw i64 %i.07, 7
-  %arrayidx1 = getelementptr inbounds double, double* %dst, i64 %mul
-  %1 = load double, double* %arrayidx1, align 8
+  %arrayidx1 = getelementptr inbounds double, ptr %dst, i64 %mul
+  %1 = load double, ptr %arrayidx1, align 8
   %add = fadd double %1, 1.000000e+00
-  store double %add, double* %arrayidx1, align 8
+  store double %add, ptr %arrayidx1, align 8
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
index eac97015b0a83..5a87b3b76b0d4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
@@ -2,14 +2,14 @@
 
 target triple = "aarch64-unknown-linux-gnu"
 
-define void @trip7_i64(i64* noalias nocapture noundef %dst, i64* noalias nocapture noundef readonly %src) #0 {
+define void @trip7_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
 ; CHECK-LABEL: @trip7_i64(
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
 ; CHECK:         [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ {{%.*}}, %vector.ph ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %vector.body ]
-; CHECK:         {{%.*}} = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* {{%.*}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)
-; CHECK:         {{%.*}} = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* {{%.*}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)
-; CHECK:         call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> {{%.*}}, <vscale x 2 x i64>* {{%.*}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
+; CHECK:         {{%.*}} = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr {{%.*}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)
+; CHECK:         {{%.*}} = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr {{%.*}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)
+; CHECK:         call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> {{%.*}}, ptr {{%.*}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
 ; CHECK:         [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[VF:%.*]] = mul i64 [[VSCALE]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[VF]]
@@ -23,13 +23,13 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i64, i64* %src, i64 %i.06
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %src, i64 %i.06
+  %0 = load i64, ptr %arrayidx, align 8
   %mul = shl nsw i64 %0, 1
-  %arrayidx1 = getelementptr inbounds i64, i64* %dst, i64 %i.06
-  %1 = load i64, i64* %arrayidx1, align 8
+  %arrayidx1 = getelementptr inbounds i64, ptr %dst, i64 %i.06
+  %1 = load i64, ptr %arrayidx1, align 8
   %add = add nsw i64 %1, %mul
-  store i64 %add, i64* %arrayidx1, align 8
+  store i64 %add, ptr %arrayidx1, align 8
   %inc = add nuw nsw i64 %i.06, 1
   %exitcond.not = icmp eq i64 %inc, 7
   br i1 %exitcond.not, label %for.end, label %for.body
@@ -38,19 +38,19 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-define void @trip5_i8(i8* noalias nocapture noundef %dst, i8* noalias nocapture noundef readonly %src) #0 {
+define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
 ; CHECK-LABEL: @trip5_i8(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[I_08]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[MUL:%.*]] = shl i8 [[TMP0]], 1
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[DST:%.*]], i64 [[I_08]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[MUL]], [[TMP1]]
-; CHECK-NEXT:    store i8 [[ADD]], i8* [[ARRAYIDX1]], align 1
+; CHECK-NEXT:    store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 5
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
@@ -62,13 +62,13 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %src, i64 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %mul = shl i8 %0, 1
-  %arrayidx1 = getelementptr inbounds i8, i8* %dst, i64 %i.08
-  %1 = load i8, i8* %arrayidx1, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08
+  %1 = load i8, ptr %arrayidx1, align 1
   %add = add i8 %mul, %1
-  store i8 %add, i8* %arrayidx1, align 1
+  store i8 %add, ptr %arrayidx1, align 1
   %inc = add nuw nsw i64 %i.08, 1
   %exitcond.not = icmp eq i64 %inc, 5
   br i1 %exitcond.not, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll
index bd1e2890e13b0..820fd8840049e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll
@@ -1,32 +1,30 @@
 ; RUN: opt -passes=loop-vectorize,dce,instcombine -mtriple aarch64-linux-gnu -mattr=+sve \
 ; RUN:   -prefer-predicate-over-epilogue=scalar-epilogue -S %s -o - | FileCheck %s
 
-define void @mloadstore_f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) {
+define void @mloadstore_f32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @mloadstore_f32
 ; CHECK: vector.body:
-; CHECK:       %[[LOAD1:.*]] = load <vscale x 4 x float>, <vscale x 4 x float>*
+; CHECK:       %[[LOAD1:.*]] = load <vscale x 4 x float>, ptr
 ; CHECK-NEXT:  %[[MASK:.*]] = fcmp ogt <vscale x 4 x float> %[[LOAD1]],
-; CHECK-NEXT:  %[[GEPA:.*]] = getelementptr float, float* %a,
-; CHECK-NEXT:  %[[MLOAD_PTRS:.*]] = bitcast float* %[[GEPA]] to <vscale x 4 x float>*
-; CHECK-NEXT:  %[[LOAD2:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* %[[MLOAD_PTRS]], i32 4, <vscale x 4 x i1> %[[MASK]]
+; CHECK-NEXT:  %[[GEPA:.*]] = getelementptr float, ptr %a,
+; CHECK-NEXT:  %[[LOAD2:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr %[[GEPA]], i32 4, <vscale x 4 x i1> %[[MASK]]
 ; CHECK-NEXT:  %[[FADD:.*]] = fadd <vscale x 4 x float> %[[LOAD1]], %[[LOAD2]]
-; CHECK-NEXT:  %[[MSTORE_PTRS:.*]] = bitcast float* %[[GEPA]] to <vscale x 4 x float>*
-; CHECK-NEXT:  call void @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float> %[[FADD]], <vscale x 4 x float>* %[[MSTORE_PTRS]], i32 4, <vscale x 4 x i1> %[[MASK]])
+; CHECK-NEXT:  call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %[[FADD]], ptr %[[GEPA]], i32 4, <vscale x 4 x i1> %[[MASK]])
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.inc
   %i.011 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %i.011
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %i.011
+  %0 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp ogt float %0, 0.000000e+00
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %arrayidx3 = getelementptr inbounds float, float* %a, i64 %i.011
-  %1 = load float, float* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %i.011
+  %1 = load float, ptr %arrayidx3, align 4
   %add = fadd float %0, %1
-  store float %add, float* %arrayidx3, align 4
+  store float %add, ptr %arrayidx3, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -38,32 +36,30 @@ exit:                                 ; preds = %for.inc
   ret void
 }
 
-define void @mloadstore_i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) {
+define void @mloadstore_i32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @mloadstore_i32
 ; CHECK: vector.body:
-; CHECK:       %[[LOAD1:.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>*
+; CHECK:       %[[LOAD1:.*]] = load <vscale x 4 x i32>, ptr
 ; CHECK-NEXT:  %[[MASK:.*]] = icmp ne <vscale x 4 x i32> %[[LOAD1]],
-; CHECK-NEXT:  %[[GEPA:.*]] = getelementptr i32, i32* %a,
-; CHECK-NEXT:  %[[MLOAD_PTRS:.*]] = bitcast i32* %[[GEPA]] to <vscale x 4 x i32>*
-; CHECK-NEXT:  %[[LOAD2:.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %[[MLOAD_PTRS]], i32 4, <vscale x 4 x i1> %[[MASK]]
+; CHECK-NEXT:  %[[GEPA:.*]] = getelementptr i32, ptr %a,
+; CHECK-NEXT:  %[[LOAD2:.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr %[[GEPA]], i32 4, <vscale x 4 x i1> %[[MASK]]
 ; CHECK-NEXT:  %[[FADD:.*]] = add <vscale x 4 x i32> %[[LOAD1]], %[[LOAD2]]
-; CHECK-NEXT:  %[[MSTORE_PTRS:.*]] = bitcast i32* %[[GEPA]] to <vscale x 4 x i32>*
-; CHECK-NEXT:  call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> %[[FADD]], <vscale x 4 x i32>* %[[MSTORE_PTRS]], i32 4, <vscale x 4 x i1> %[[MASK]])
+; CHECK-NEXT:  call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %[[FADD]], ptr %[[GEPA]], i32 4, <vscale x 4 x i1> %[[MASK]])
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.inc
   %i.011 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.011
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %i.011
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp ne i32 %0, 0
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %i.011
-  %1 = load i32, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 %i.011
+  %1 = load i32, ptr %arrayidx3, align 4
   %add = add i32 %0, %1
-  store i32 %add, i32* %arrayidx3, align 4
+  store i32 %add, ptr %arrayidx3, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-scalable-load-in-loop.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-scalable-load-in-loop.ll
index 4296b466b3a0a..4304105d42751 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-scalable-load-in-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-scalable-load-in-loop.ll
@@ -20,7 +20,7 @@
 
 ; CHECK-LABEL: @scalable_load_in_loop
 ; CHECK-NOT: vector.body
-define void @scalable_load_in_loop(i64 %n, <vscale x 4 x i32>* %x, <vscale x 4 x i32>* %y) {
+define void @scalable_load_in_loop(i64 %n, ptr %x, ptr %y) {
 entry:
   br label %for.body
 
@@ -31,8 +31,8 @@ for.body:
   br i1 %cmp, label %for.inc, label %if.end
 
 if.end:
-  %0 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %y
-  store <vscale x 4 x i32> %0, <vscale x 4 x i32>* %x
+  %0 = load <vscale x 4 x i32>, ptr %y
+  store <vscale x 4 x i32> %0, ptr %x
   br label %for.inc
 
 for.inc:

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
index f960164d060ee..cf5fdc4e30e83 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
@@ -5,7 +5,7 @@
 
 target triple = "aarch64-linux-gnu"
 
-define i32 @select_const_i32_from_icmp(i32* nocapture readonly %v, i64 %n) #0 {
+define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 {
 ; CHECK-VF4IC1-LABEL: @select_const_i32_from_icmp
 ; CHECK-VF4IC1:      vector.body:
 ; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
@@ -47,8 +47,8 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
   %1 = phi i32 [ 3, %entry ], [ %5, %for.body ]
-  %2 = getelementptr inbounds i32, i32* %v, i64 %0
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %v, i64 %0
+  %3 = load i32, ptr %2, align 4
   %4 = icmp eq i32 %3, 3
   %5 = select i1 %4, i32 %1, i32 7
   %6 = add nuw nsw i64 %0, 1
@@ -59,7 +59,7 @@ exit:                                     ; preds = %for.body
   ret i32 %5
 }
 
-define i32 @select_i32_from_icmp(i32* nocapture readonly %v, i32 %a, i32 %b, i64 %n) #0 {
+define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) #0 {
 ; CHECK-VF4IC1-LABEL: @select_i32_from_icmp
 ; CHECK-VF4IC1:      vector.ph:
 ; CHECK-VF4IC1:        [[TMP1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 %a, i32 0
@@ -86,8 +86,8 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
   %1 = phi i32 [ %a, %entry ], [ %5, %for.body ]
-  %2 = getelementptr inbounds i32, i32* %v, i64 %0
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %v, i64 %0
+  %3 = load i32, ptr %2, align 4
   %4 = icmp eq i32 %3, 3
   %5 = select i1 %4, i32 %1, i32 %b
   %6 = add nuw nsw i64 %0, 1
@@ -98,7 +98,7 @@ exit:                                     ; preds = %for.body
   ret i32 %5
 }
 
-define i32 @select_const_i32_from_fcmp(float* nocapture readonly %v, i64 %n) #0 {
+define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 {
 ; CHECK-VF4IC1-LABEL: @select_const_i32_from_fcmp
 ; CHECK-VF4IC1:      vector.body:
 ; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
@@ -118,8 +118,8 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
   %1 = phi i32 [ 2, %entry ], [ %5, %for.body ]
-  %2 = getelementptr inbounds float, float* %v, i64 %0
-  %3 = load float, float* %2, align 4
+  %2 = getelementptr inbounds float, ptr %v, i64 %0
+  %3 = load float, ptr %2, align 4
   %4 = fcmp fast ueq float %3, 3.0
   %5 = select i1 %4, i32 %1, i32 1
   %6 = add nuw nsw i64 %0, 1
@@ -130,7 +130,7 @@ exit:                                     ; preds = %for.body
   ret i32 %5
 }
 
-define float @select_const_f32_from_icmp(i32* nocapture readonly %v, i64 %n) #0 {
+define float @select_const_f32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 {
 ; CHECK-VF4IC1-LABEL: @select_const_f32_from_icmp
 ; CHECK-VF4IC1-NOT: vector.body
 ; CHECK-VF4IC4-LABEL: @select_const_f32_from_icmp
@@ -141,8 +141,8 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
   %1 = phi fast float [ 3.0, %entry ], [ %5, %for.body ]
-  %2 = getelementptr inbounds i32, i32* %v, i64 %0
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %v, i64 %0
+  %3 = load i32, ptr %2, align 4
   %4 = icmp eq i32 %3, 3
   %5 = select fast i1 %4, float %1, float 7.0
   %6 = add nuw nsw i64 %0, 1
@@ -153,13 +153,13 @@ exit:                                     ; preds = %for.body
   ret float %5
 }
 
-define i32 @pred_select_const_i32_from_icmp(i32* noalias nocapture readonly %src1, i32* noalias nocapture readonly %src2, i64 %n) #0 {
+define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i64 %n) #0 {
 ; CHECK-VF4IC1-LABEL: @pred_select_const_i32_from_icmp
 ; CHECK-VF4IC1:      vector.body:
 ; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
 ; CHECK-VF4IC1:        [[VEC_LOAD:%.*]] = load <vscale x 4 x i32>
 ; CHECK-VF4IC1:        [[MASK:%.*]] = icmp sgt <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 35, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1:        [[MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* {{%.*}}, i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x i32> poison)
+; CHECK-VF4IC1:        [[MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr {{%.*}}, i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x i32> poison)
 ; CHECK-VF4IC1-NEXT:   [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-VF4IC1-NEXT:   [[VEC_SEL_TMP:%.*]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[VEC_PHI]]
 ; CHECK-VF4IC1:        [[VEC_SEL:%.*]] = select <vscale x 4 x i1> [[MASK]], <vscale x 4 x i32> [[VEC_SEL_TMP]], <vscale x 4 x i32> [[VEC_PHI]]
@@ -176,14 +176,14 @@ entry:
 for.body:                                         ; preds = %entry, %for.inc
   %i.013 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
   %r.012 = phi i32 [ %r.1, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %src1, i64 %i.013
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %src1, i64 %i.013
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 35
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %arrayidx2 = getelementptr inbounds i32, i32* %src2, i64 %i.013
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %src2, i64 %i.013
+  %1 = load i32, ptr %arrayidx2, align 4
   %cmp3 = icmp eq i32 %1, 2
   %spec.select = select i1 %cmp3, i32 1, i32 %r.012
   br label %for.inc

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll
index 1ce996c7ea231..36dc30d76ed1f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll
@@ -12,15 +12,15 @@ target triple="aarch64-unknown-linux-gnu"
 ; CHECK-CPU-NEOVERSE-N2: Found an estimated cost of 4 for VF vscale x 2 For instruction:   %add = fadd float %0, %sum.07
 ; CHECK-CPU-NEOVERSE-N2: Found an estimated cost of 8 for VF vscale x 4 For instruction:   %add = fadd float %0, %sum.07
 
-define float @fadd_strict32(float* noalias nocapture readonly %a, i64 %n) #0 {
+define float @fadd_strict32(ptr noalias nocapture readonly %a, i64 %n) #0 {
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd float %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -34,15 +34,15 @@ for.end:
 ; CHECK: Found an estimated cost of 8 for VF vscale x 2 For instruction:   %add = fadd double %0, %sum.07
 ; CHECK-CPU-NEOVERSE-N2: Found an estimated cost of 4 for VF vscale x 2 For instruction:   %add = fadd double %0, %sum.07
 
-define double @fadd_strict64(double* noalias nocapture readonly %a, i64 %n) #0 {
+define double @fadd_strict64(ptr noalias nocapture readonly %a, i64 %n) #0 {
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %a, i64 %iv
-  %0 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %a, i64 %iv
+  %0 = load double, ptr %arrayidx, align 4
   %add = fadd double %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll
index aa8ed2994da44..bd3f222cc98d9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll
@@ -3,7 +3,7 @@
 target triple = "aarch64-unknown-linux-gnu"
 
 
-define void @f16_to_f32(float* noalias nocapture %dst, half* noalias nocapture readonly %src, i64 %N) #0 {
+define void @f16_to_f32(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @f16_to_f32(
 ; CHECK: vector.body
 ; CHECK:   %{{.*}} = fpext <vscale x 8 x half> %{{.*}} to <vscale x 8 x float>
@@ -12,11 +12,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds half, half* %src, i64 %i.07
-  %0 = load half, half* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds half, ptr %src, i64 %i.07
+  %0 = load half, ptr %arrayidx, align 2
   %conv = fpext half %0 to float
-  %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
-  store float %conv, float* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07
+  store float %conv, ptr %arrayidx1, align 4
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -26,7 +26,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 
-define void @f64_to_f32(float* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %N) #0 {
+define void @f64_to_f32(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @f64_to_f32(
 ; CHECK: vector.body
 ; CHECK:   %{{.*}} = fptrunc <vscale x 8 x double> %{{.*}} to <vscale x 8 x float>
@@ -35,11 +35,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %src, i64 %i.07
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %src, i64 %i.07
+  %0 = load double, ptr %arrayidx, align 8
   %conv = fptrunc double %0 to float
-  %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
-  store float %conv, float* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07
+  store float %conv, ptr %arrayidx1, align 4
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -49,7 +49,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 
-define void @f16_to_s8(i8* noalias nocapture %dst, half* noalias nocapture readonly %src, i64 %N) #0 {
+define void @f16_to_s8(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @f16_to_s8(
 ; CHECK: vector.body
 ; CHECK:   %{{.*}} = fptosi <vscale x 8 x half> %{{.*}} to <vscale x 8 x i8>
@@ -58,11 +58,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds half, half* %src, i64 %i.08
-  %0 = load half, half* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds half, ptr %src, i64 %i.08
+  %0 = load half, ptr %arrayidx, align 2
   %conv1 = fptosi half %0 to i8
-  %arrayidx2 = getelementptr inbounds i8, i8* %dst, i64 %i.08
-  store i8 %conv1, i8* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds i8, ptr %dst, i64 %i.08
+  store i8 %conv1, ptr %arrayidx2, align 1
   %inc = add nuw nsw i64 %i.08, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -72,7 +72,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 
-define void @f32_to_u64(i64* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %N) #0 {
+define void @f32_to_u64(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @f32_to_u64(
 ; CHECK: vector.body
 ; CHECK:   %{{.*}} = fptoui <vscale x 8 x float> %{{.*}} to <vscale x 8 x i64>
@@ -81,11 +81,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %src, i64 %i.07
+  %0 = load float, ptr %arrayidx, align 4
   %conv = fptoui float %0 to i64
-  %arrayidx1 = getelementptr inbounds i64, i64* %dst, i64 %i.07
-  store i64 %conv, i64* %arrayidx1, align 8
+  %arrayidx1 = getelementptr inbounds i64, ptr %dst, i64 %i.07
+  store i64 %conv, ptr %arrayidx1, align 8
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -95,7 +95,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 
-define void @s8_to_f32(float* noalias nocapture %dst, i8* noalias nocapture readonly %src, i64 %N) #0 {
+define void @s8_to_f32(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @s8_to_f32(
 ; CHECK: vector.body
 ; CHECK:   %{{.*}} = sitofp <vscale x 8 x i8> %{{.*}} to <vscale x 8 x float>
@@ -104,11 +104,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %src, i64 %i.07
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.07
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = sitofp i8 %0 to float
-  %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
-  store float %conv, float* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07
+  store float %conv, ptr %arrayidx1, align 4
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -118,7 +118,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 
-define void @u16_to_f32(float* noalias nocapture %dst, i16* noalias nocapture readonly %src, i64 %N) #0 {
+define void @u16_to_f32(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @u16_to_f32(
 ; CHECK: vector.body
 ; CHECK:   %{{.*}} = uitofp <vscale x 8 x i16> %{{.*}} to <vscale x 8 x float>
@@ -127,11 +127,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i16, i16* %src, i64 %i.07
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %src, i64 %i.07
+  %0 = load i16, ptr %arrayidx, align 2
   %conv = uitofp i16 %0 to float
-  %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
-  store float %conv, float* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07
+  store float %conv, ptr %arrayidx1, align 4
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -141,7 +141,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 
-define void @u64_to_f16(half* noalias nocapture %dst, i64* noalias nocapture readonly %src, i64 %N) #0 {
+define void @u64_to_f16(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @u64_to_f16(
 ; CHECK:      vector.body
 ; CHECK:        %{{.*}} = uitofp <vscale x 8 x i64> %{{.*}} to <vscale x 8 x half>
@@ -150,11 +150,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %src, i64 %i.08
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %src, i64 %i.08
+  %0 = load i64, ptr %arrayidx, align 8
   %conv1 = uitofp i64 %0 to half
-  %arrayidx2 = getelementptr inbounds half, half* %dst, i64 %i.08
-  store half %conv1, half* %arrayidx2, align 2
+  %arrayidx2 = getelementptr inbounds half, ptr %dst, i64 %i.08
+  store half %conv1, ptr %arrayidx2, align 2
   %inc = add nuw nsw i64 %i.08, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -164,7 +164,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 
-define void @s64_to_f16(half* noalias nocapture %dst, i64* noalias nocapture readonly %src, i64 %N) #0 {
+define void @s64_to_f16(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @s64_to_f16(
 ; CHECK:      vector.body
 ; CHECK:        %{{.*}} = sitofp <vscale x 8 x i64> %{{.*}} to <vscale x 8 x half>
@@ -173,11 +173,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %src, i64 %i.08
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %src, i64 %i.08
+  %0 = load i64, ptr %arrayidx, align 8
   %conv1 = sitofp i64 %0 to half
-  %arrayidx2 = getelementptr inbounds half, half* %dst, i64 %i.08
-  store half %conv1, half* %arrayidx2, align 2
+  %arrayidx2 = getelementptr inbounds half, ptr %dst, i64 %i.08
+  store half %conv1, ptr %arrayidx2, align 2
   %inc = add nuw nsw i64 %i.08, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -187,7 +187,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 
-define void @s8_to_s32(i32* noalias nocapture %dst, i8* noalias nocapture readonly %src, i64 %N) #0 {
+define void @s8_to_s32(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @s8_to_s32(
 ; CHECK: vector.body
 ; CHECK:   %{{.*}} = sext <vscale x 8 x i8> %{{.*}} to <vscale x 8 x i32>
@@ -196,11 +196,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %src, i64 %i.07
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.07
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = sext i8 %0 to i32
-  %arrayidx1 = getelementptr inbounds i32, i32* %dst, i64 %i.07
-  store i32 %conv, i32* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %dst, i64 %i.07
+  store i32 %conv, ptr %arrayidx1, align 4
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -210,7 +210,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 
-define void @u8_to_u16(i16* noalias nocapture %dst, i8* noalias nocapture readonly %src, i64 %N) #0 {
+define void @u8_to_u16(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @u8_to_u16(
 ; CHECK: vector.body
 ; CHECK:   %{{.*}} = zext <vscale x 8 x i8> %{{.*}} to <vscale x 8 x i16>
@@ -219,11 +219,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %src, i64 %i.07
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.07
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i16
-  %arrayidx1 = getelementptr inbounds i16, i16* %dst, i64 %i.07
-  store i16 %conv, i16* %arrayidx1, align 2
+  %arrayidx1 = getelementptr inbounds i16, ptr %dst, i64 %i.07
+  store i16 %conv, ptr %arrayidx1, align 2
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -233,7 +233,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 
-define void @s64_to_s8(i8* noalias nocapture %dst, i64* noalias nocapture readonly %src, i64 %N) #0 {
+define void @s64_to_s8(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @s64_to_s8(
 ; CHECK: vector.body
 ; CHECK:   %{{.*}} = trunc <vscale x 8 x i64> %{{.*}} to <vscale x 8 x i8>
@@ -242,11 +242,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %src, i64 %i.07
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %src, i64 %i.07
+  %0 = load i64, ptr %arrayidx, align 8
   %conv = trunc i64 %0 to i8
-  %arrayidx1 = getelementptr inbounds i8, i8* %dst, i64 %i.07
-  store i8 %conv, i8* %arrayidx1, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.07
+  store i8 %conv, ptr %arrayidx1, align 1
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll
index a40ff862946ff..34e2f344545be 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll
@@ -16,13 +16,13 @@
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-unknown-linux-gnu"
 
-define void @vector_reverse_mask_nxv4i1(double* %a, double* %cond, i64 %N) #0 {
+define void @vector_reverse_mask_nxv4i1(ptr %a, ptr %cond, i64 %N) #0 {
 ; CHECK-LABEL: vector.body:
 ; CHECK: %[[REVERSE6:.*]] = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %{{.*}})
-; CHECK: %[[WIDEMSKLOAD:.*]] = call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0nxv4f64(<vscale x 4 x double>* %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE6]], <vscale x 4 x double> poison)
+; CHECK: %[[WIDEMSKLOAD:.*]] = call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE6]], <vscale x 4 x double> poison)
 ; CHECK-NEXT: %[[FADD:.*]] = fadd <vscale x 4 x double> %[[WIDEMSKLOAD]]
 ; CHECK:  %[[REVERSE9:.*]] = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %{{.*}})
-; CHECK: call void @llvm.masked.store.nxv4f64.p0nxv4f64(<vscale x 4 x double> %[[FADD]], <vscale x 4 x double>* %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE9]]
+; CHECK: call void @llvm.masked.store.nxv4f64.p0(<vscale x 4 x double> %[[FADD]], ptr %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE9]]
 
 entry:
   %cmp7 = icmp sgt i64 %N, 0
@@ -34,16 +34,16 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup, %
 for.body:                                         ; preds = %for.body, %entry
   %i.08.in = phi i64 [ %i.08, %for.inc ], [ %N, %entry ]
   %i.08 = add nsw i64 %i.08.in, -1
-  %arrayidx = getelementptr inbounds double, double* %cond, i64 %i.08
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %cond, i64 %i.08
+  %0 = load double, ptr %arrayidx, align 8
   %tobool = fcmp une double %0, 0.000000e+00
   br i1 %tobool, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %arrayidx1 = getelementptr inbounds double, double* %a, i64 %i.08
-  %1 = load double, double* %arrayidx1, align 8
+  %arrayidx1 = getelementptr inbounds double, ptr %a, i64 %i.08
+  %1 = load double, ptr %arrayidx1, align 8
   %add = fadd double %1, 1.000000e+00
-  store double %add, double* %arrayidx1, align 8
+  store double %add, ptr %arrayidx1, align 8
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
index cc51c14594a9b..003a3f900fd43 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
@@ -8,7 +8,7 @@
 ; RUN: opt -passes=loop-vectorize,dce,instcombine -mtriple aarch64-linux-gnu -S \
 ; RUN:   -prefer-predicate-over-epilogue=scalar-epilogue < %s | FileCheck %s
 
-define void @vector_reverse_f64(i64 %N, double* noalias %a, double* noalias %b) #0{
+define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{
 ; CHECK-LABEL: @vector_reverse_f64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i64 [[N:%.*]], 0
@@ -28,23 +28,21 @@ define void @vector_reverse_f64(i64 %N, double* noalias %a, double* noalias %b)
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[INDEX]], -1
 ; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP4]], [[N]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP8:%.*]] = shl i32 [[TMP7]], 3
 ; CHECK-NEXT:    [[TMP9:%.*]] = sub i32 1, [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast double* [[TMP11]] to <vscale x 8 x double>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x double>, <vscale x 8 x double>* [[TMP12]], align 8
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP10]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x double>, ptr [[TMP11]], align 8
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP5]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i32 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP15:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP16:%.*]] = shl i32 [[TMP15]], 3
 ; CHECK-NEXT:    [[TMP17:%.*]] = sub i32 1, [[TMP16]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = sext i32 [[TMP17]] to i64
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds double, double* [[TMP13]], i64 [[TMP18]]
-; CHECK-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP19]] to <vscale x 8 x double>*
-; CHECK-NEXT:    store <vscale x 8 x double> [[TMP14]], <vscale x 8 x double>* [[TMP20]], align 8
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds double, ptr [[TMP13]], i64 [[TMP18]]
+; CHECK-NEXT:    store <vscale x 8 x double> [[TMP14]], ptr [[TMP19]], align 8
 ; CHECK-NEXT:    [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP22:%.*]] = shl i64 [[TMP21]], 3
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]]
@@ -63,11 +61,11 @@ define void @vector_reverse_f64(i64 %N, double* noalias %a, double* noalias %b)
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08_IN:%.*]] = phi i64 [ [[I_08:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[I_08]] = add nsw i64 [[I_08_IN]], -1
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[I_08]]
-; CHECK-NEXT:    [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP24:%.*]] = load double, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP24]], 1.000000e+00
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[I_08]]
-; CHECK-NEXT:    store double [[ADD]], double* [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]]
+; CHECK-NEXT:    store double [[ADD]], ptr [[ARRAYIDX1]], align 8
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[I_08_IN]], 1
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]]
 ;
@@ -81,21 +79,21 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %entry, %for.body
   %i.08.in = phi i64 [ %i.08, %for.body ], [ %N, %entry ]
   %i.08 = add nsw i64 %i.08.in, -1
-  %arrayidx = getelementptr inbounds double, double* %b, i64 %i.08
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %b, i64 %i.08
+  %0 = load double, ptr %arrayidx, align 8
   %add = fadd double %0, 1.000000e+00
-  %arrayidx1 = getelementptr inbounds double, double* %a, i64 %i.08
-  store double %add, double* %arrayidx1, align 8
+  %arrayidx1 = getelementptr inbounds double, ptr %a, i64 %i.08
+  store double %add, ptr %arrayidx1, align 8
   %cmp = icmp sgt i64 %i.08.in, 1
   br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !0
 }
 
 
-define void @vector_reverse_i64(i64 %N, i64* %a, i64* %b) #0 {
+define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 {
 ; CHECK-LABEL: @vector_reverse_i64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A2:%.*]] = ptrtoint i64* [[A:%.*]] to i64
-; CHECK-NEXT:    [[B1:%.*]] = ptrtoint i64* [[B:%.*]] to i64
+; CHECK-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A:%.*]] to i64
+; CHECK-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B:%.*]] to i64
 ; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i64 [[N:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
 ; CHECK:       for.body.preheader:
@@ -122,23 +120,21 @@ define void @vector_reverse_i64(i64 %N, i64* %a, i64* %b) #0 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP10:%.*]] = xor i64 [[INDEX]], -1
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], [[N]]
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP11]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP14:%.*]] = shl i32 [[TMP13]], 3
 ; CHECK-NEXT:    [[TMP15:%.*]] = sub i32 1, [[TMP14]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = sext i32 [[TMP15]] to i64
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i64, i64* [[TMP12]], i64 [[TMP16]]
-; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i64* [[TMP17]] to <vscale x 8 x i64>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x i64>, <vscale x 8 x i64>* [[TMP18]], align 8
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x i64>, ptr [[TMP17]], align 8
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
 ; CHECK-NEXT:    [[TMP20:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i32 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP21:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP22:%.*]] = shl i32 [[TMP21]], 3
 ; CHECK-NEXT:    [[TMP23:%.*]] = sub i32 1, [[TMP22]]
 ; CHECK-NEXT:    [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, i64* [[TMP19]], i64 [[TMP24]]
-; CHECK-NEXT:    [[TMP26:%.*]] = bitcast i64* [[TMP25]] to <vscale x 8 x i64>*
-; CHECK-NEXT:    store <vscale x 8 x i64> [[TMP20]], <vscale x 8 x i64>* [[TMP26]], align 8
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[TMP19]], i64 [[TMP24]]
+; CHECK-NEXT:    store <vscale x 8 x i64> [[TMP20]], ptr [[TMP25]], align 8
 ; CHECK-NEXT:    [[TMP27:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP28:%.*]] = shl i64 [[TMP27]], 3
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP28]]
@@ -157,11 +153,11 @@ define void @vector_reverse_i64(i64 %N, i64* %a, i64* %b) #0 {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_09_IN:%.*]] = phi i64 [ [[I_09:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[I_09]] = add nsw i64 [[I_09_IN]], -1
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[I_09]]
-; CHECK-NEXT:    [[TMP30:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_09]]
+; CHECK-NEXT:    [[TMP30:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[TMP30]], 1
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I_09]]
-; CHECK-NEXT:    store i64 [[ADD]], i64* [[ARRAYIDX2]], align 8
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_09]]
+; CHECK-NEXT:    store i64 [[ADD]], ptr [[ARRAYIDX2]], align 8
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[I_09_IN]], 1
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP6:![0-9]+]]
 ;
@@ -175,11 +171,11 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %entry, %for.body
   %i.09.in = phi i64 [ %i.09, %for.body ], [ %N, %entry ]
   %i.09 = add nsw i64 %i.09.in, -1
-  %arrayidx = getelementptr inbounds i64, i64* %b, i64 %i.09
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %b, i64 %i.09
+  %0 = load i64, ptr %arrayidx, align 8
   %add = add i64 %0, 1
-  %arrayidx2 = getelementptr inbounds i64, i64* %a, i64 %i.09
-  store i64 %add, i64* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds i64, ptr %a, i64 %i.09
+  store i64 %add, ptr %arrayidx2, align 8
   %cmp = icmp sgt i64 %i.09.in, 1
   br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !0
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
index 3622e358fc334..d9acd499ff0a1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
@@ -2,7 +2,7 @@
 
 target triple = "aarch64-unknown-linux-gnu"
 
-define void @widen_extractvalue(i64* %dst, {i64, i64} %sv) #0 {
+define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 {
 ; CHECK-LABEL: @widen_extractvalue(
 ; CHECK: vector.body:
 ; CHECK:        [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV:%.*]], 0
@@ -19,9 +19,9 @@ loop.body:
   %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ]
   %a = extractvalue { i64, i64 } %sv, 0
   %b = extractvalue { i64, i64 } %sv, 1
-  %addr = getelementptr i64, i64* %dst, i32 %iv
+  %addr = getelementptr i64, ptr %dst, i32 %iv
   %add = add i64 %a, %b
-  store i64 %add, i64* %addr
+  store i64 %add, ptr %addr
   %iv.next = add nsw i32 %iv, 1
   %cond = icmp ne i32 %iv.next, 0
   br i1 %cond, label %loop.body, label %exit, !llvm.loop !0

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll
index 837d4cc5fce70..7272a69e81731 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll
@@ -4,7 +4,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-gnu"
 
 ; CHECK-LABEL: test0
-define void @test0(i16* noalias %M3) {
+define void @test0(ptr noalias %M3) {
 entry:
   br label %if.then1165.us
 
@@ -14,8 +14,8 @@ if.then1165.us:                                   ; preds = %if.then1165.us, %en
   %add1178.us = add nsw i32 %conv1177.us, undef
   %conv1179.us = trunc i32 %add1178.us to i16
   %idxprom1181.us = ashr exact i64 undef, 32
-  %arrayidx1185.us = getelementptr inbounds i16, i16* %M3, i64 %idxprom1181.us
-  store i16 %conv1179.us, i16* %arrayidx1185.us, align 2
+  %arrayidx1185.us = getelementptr inbounds i16, ptr %M3, i64 %idxprom1181.us
+  store i16 %conv1179.us, ptr %arrayidx1185.us, align 2
   %indvars.iv.next1784 = add nuw nsw i64 %indvars.iv1783, 1
   %exitcond1785 = icmp eq i64 %indvars.iv.next1784, 16
   br i1 %exitcond1785, label %for.inc1286.loopexit, label %if.then1165.us
@@ -25,19 +25,19 @@ for.inc1286.loopexit:                             ; preds = %if.then1165.us
 }
 
 ; CHECK-LABEL: test1
-define void @test1(i16* noalias %M3) {
+define void @test1(ptr noalias %M3) {
 entry:
   br label %if.then1165.us
 
 if.then1165.us:                                   ; preds = %if.then1165.us, %entry
   %indvars.iv1783 = phi i64 [ 0, %entry ], [ %indvars.iv.next1784, %if.then1165.us ]
-  %fptr = load i32, i32* undef, align 4
+  %fptr = load i32, ptr undef, align 4
   %conv1177.us = zext i16 undef to i32
   %add1178.us = add nsw i32 %conv1177.us, %fptr
   %conv1179.us = trunc i32 %add1178.us to i16
   %idxprom1181.us = ashr exact i64 undef, 32
-  %arrayidx1185.us = getelementptr inbounds i16, i16* %M3, i64 %idxprom1181.us
-  store i16 %conv1179.us, i16* %arrayidx1185.us, align 2
+  %arrayidx1185.us = getelementptr inbounds i16, ptr %M3, i64 %idxprom1181.us
+  store i16 %conv1179.us, ptr %arrayidx1185.us, align 2
   %indvars.iv.next1784 = add nuw nsw i64 %indvars.iv1783, 1
   %exitcond1785 = icmp eq i64 %indvars.iv.next1784, 16
   br i1 %exitcond1785, label %for.inc1286.loopexit, label %if.then1165.us

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/unsafe-vf-hint-remark.ll b/llvm/test/Transforms/LoopVectorize/AArch64/unsafe-vf-hint-remark.ll
index fb12e9c7c6b7a..9b635bf117ad5 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/unsafe-vf-hint-remark.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/unsafe-vf-hint-remark.ll
@@ -9,24 +9,24 @@
 ; CHECK: LV: Selecting VF: 2.
 ; CHECK-LABEL: @test
 ; CHECK: <2 x i64>
-define void @test(i64* nocapture %a, i64* nocapture readonly %b) {
+define void @test(ptr nocapture %a, ptr nocapture readonly %b) {
 entry:
   br label %loop.header
 
 loop.header:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %iv
-  %0 = load i64, i64* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i64, i64* %b, i64 %iv
-  %1 = load i64, i64* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
+  %0 = load i64, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i64, ptr %b, i64 %iv
+  %1 = load i64, ptr %arrayidx2, align 4
   %add = add nsw i64 %1, %0
   %2 = add nuw nsw i64 %iv, 16
-  %arrayidx5 = getelementptr inbounds i64, i64* %a, i64 %2
+  %arrayidx5 = getelementptr inbounds i64, ptr %a, i64 %2
   %c = icmp eq i64 %1, 120
   br i1 %c, label %then, label %latch
 
 then:
-  store i64 %add, i64* %arrayidx5, align 4
+  store i64 %add, ptr %arrayidx5, align 4
   br label %latch
 
 latch:

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll
index 6cfdafe8f8ebd..c1e27ca468f37 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "arm64-apple-darwin"
 
 declare float @expf(float) nounwind readnone
-define void @expf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @expf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @expf_v4f32(
 ; CHECK: call <4 x float> @_simd_exp_f4(
 ; CHECK: ret void
@@ -14,11 +14,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @expf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -28,7 +28,7 @@ for.end:
 }
 
 declare double @exp(double) nounwind readnone
-define void @exp_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @exp_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @exp_v2f64(
 ; CHECK: call <2 x double> @_simd_exp_d2(
 ; CHECK: ret void
@@ -38,11 +38,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @exp(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -52,7 +52,7 @@ for.end:
 }
 
 declare float @acosf(float) nounwind readnone
-define void @acos_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @acos_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @acos_v4f32(
 ; CHECK: call <4 x float> @_simd_acos_f4(
 ; CHECK: ret void
@@ -62,11 +62,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @acosf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -76,7 +76,7 @@ for.end:
 }
 
 declare double @acos(double) nounwind readnone
-define void @acos_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @acos_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @acos_v2f64(
 ; CHECK: call <2 x double> @_simd_acos_d2(
 ; CHECK: ret void
@@ -86,11 +86,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @acos(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -100,7 +100,7 @@ for.end:
 }
 
 declare float @asinf(float) nounwind readnone
-define void @asinf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @asinf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @asinf_v4f32(
 ; CHECK: call <4 x float> @_simd_asin_f4(
 ; CHECK: ret void
@@ -110,11 +110,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @asinf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -124,7 +124,7 @@ for.end:
 }
 
 declare double @asin(double) nounwind readnone
-define void @asin_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @asin_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @asin_v2f64(
 ; CHECK: call <2 x double> @_simd_asin_d2(
 ; CHECK: ret void
@@ -134,11 +134,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @asin(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -148,7 +148,7 @@ for.end:
 }
 
  declare float @atanf(float) nounwind readnone
-define void @atanf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @atanf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @atanf_v4f32(
 ; CHECK: call <4 x float> @_simd_atan_f4(
 ; CHECK: ret void
@@ -158,11 +158,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @atanf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -172,7 +172,7 @@ for.end:
 }
 
 declare double @atan(double) nounwind readnone
-define void @atan_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @atan_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @atan_v2f64(
 ; CHECK: call <2 x double> @_simd_atan_d2(
 ; CHECK: ret void
@@ -182,11 +182,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @atan(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -196,7 +196,7 @@ for.end:
 }
 
 declare float @atan2f(float) nounwind readnone
-define void @atan2f_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @atan2f_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @atan2f_v4f32(
 ; CHECK: call <4 x float> @_simd_atan2_f4(
 ; CHECK: ret void
@@ -206,11 +206,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @atan2f(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -220,7 +220,7 @@ for.end:
 }
 
 declare double @atan2(double) nounwind readnone
-define void @atan2_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @atan2_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @atan2_v2f64(
 ; CHECK: call <2 x double> @_simd_atan2_d2(
 ; CHECK: ret void
@@ -230,11 +230,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @atan2(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -244,7 +244,7 @@ for.end:
 }
 
 declare float @cosf(float) nounwind readnone
-define void @cosf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @cosf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @cosf_v4f32(
 ; CHECK: call <4 x float> @_simd_cos_f4(
 ; CHECK: ret void
@@ -254,11 +254,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @cosf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -268,7 +268,7 @@ for.end:
 }
 
 declare double @cos(double) nounwind readnone
-define void @cos_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @cos_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @cos_v2f64(
 ; CHECK: call <2 x double> @_simd_cos_d2(
 ; CHECK: ret void
@@ -278,11 +278,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @cos(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -292,7 +292,7 @@ for.end:
 }
 
 declare float @cbrtf(float) nounwind readnone
-define void @cbrtf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @cbrtf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @cbrtf_v4f32(
 ; CHECK: call <4 x float> @_simd_cbrt_f4(
 ; CHECK: ret void
@@ -302,11 +302,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @cbrtf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -316,7 +316,7 @@ for.end:
 }
 
 declare double @cbrt(double) nounwind readnone
-define void @cbrt_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @cbrt_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @cbrt_v2f64(
 ; CHECK: call <2 x double> @_simd_cbrt_d2(
 ; CHECK: ret void
@@ -326,11 +326,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @cbrt(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -340,7 +340,7 @@ for.end:
 }
 
 declare float @erff(float) nounwind readnone
-define void @erff_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @erff_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @erff_v4f32(
 ; CHECK: call <4 x float> @_simd_erf_f4(
 ; CHECK: ret void
@@ -350,11 +350,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @erff(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -364,7 +364,7 @@ for.end:
 }
 
 declare double @erf(double) nounwind readnone
-define void @erf_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @erf_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @erf_v2f64(
 ; CHECK: call <2 x double> @_simd_erf_d2(
 ; CHECK: ret void
@@ -374,11 +374,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @erf(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -388,7 +388,7 @@ for.end:
 }
 
 declare float @powf(float) nounwind readnone
-define void @powf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @powf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @powf_v4f32(
 ; CHECK: call <4 x float> @_simd_pow_f4(
 ; CHECK: ret void
@@ -398,11 +398,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @powf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -412,7 +412,7 @@ for.end:
 }
 
 declare double @pow(double) nounwind readnone
-define void @pow_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @pow_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @pow_v2f64(
 ; CHECK: call <2 x double> @_simd_pow_d2(
 ; CHECK: ret void
@@ -422,11 +422,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @pow(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -436,7 +436,7 @@ for.end:
 }
 
 declare float @sinhf(float) nounwind readnone
-define void @sinhf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @sinhf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @sinhf_v4f32(
 ; CHECK: call <4 x float> @_simd_sinh_f4(
 ; CHECK: ret void
@@ -446,11 +446,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @sinhf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -460,7 +460,7 @@ for.end:
 }
 
 declare double @sinh(double) nounwind readnone
-define void @sinh_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @sinh_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @sinh_v2f64(
 ; CHECK: call <2 x double> @_simd_sinh_d2(
 ; CHECK: ret void
@@ -470,11 +470,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @sinh(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -484,7 +484,7 @@ for.end:
 }
 
 declare float @coshf(float) nounwind readnone
-define void @coshf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @coshf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @coshf_v4f32(
 ; CHECK: call <4 x float> @_simd_cosh_f4(
 ; CHECK: ret void
@@ -494,11 +494,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @coshf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -508,7 +508,7 @@ for.end:
 }
 
 declare double @cosh(double) nounwind readnone
-define void @cosh_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @cosh_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @cosh_v2f64(
 ; CHECK: call <2 x double> @_simd_cosh_d2(
 ; CHECK: ret void
@@ -518,11 +518,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @cosh(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -532,7 +532,7 @@ for.end:
 }
 
 declare float @tanhf(float) nounwind readnone
-define void @tanhf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @tanhf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @tanhf_v4f32(
 ; CHECK: call <4 x float> @_simd_tanh_f4(
 ; CHECK: ret void
@@ -542,11 +542,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @tanhf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -556,7 +556,7 @@ for.end:
 }
 
 declare double @tanh(double) nounwind readnone
-define void @tanh_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @tanh_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @tanh_v2f64(
 ; CHECK: call <2 x double> @_simd_tanh_d2(
 ; CHECK: ret void
@@ -566,11 +566,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @tanh(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -580,7 +580,7 @@ for.end:
 }
 
 declare float @asinhf(float) nounwind readnone
-define void @asinhf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @asinhf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @asinhf_v4f32(
 ; CHECK: call <4 x float> @_simd_asinh_f4(
 ; CHECK: ret void
@@ -590,11 +590,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @asinhf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -604,7 +604,7 @@ for.end:
 }
 
 declare double @asinh(double) nounwind readnone
-define void @asinh_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @asinh_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @asinh_v2f64(
 ; CHECK: call <2 x double> @_simd_asinh_d2(
 ; CHECK: ret void
@@ -614,11 +614,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @asinh(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -628,7 +628,7 @@ for.end:
 }
 
 declare float @acoshf(float) nounwind readnone
-define void @acoshf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @acoshf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @acoshf_v4f32(
 ; CHECK: call <4 x float> @_simd_acosh_f4(
 ; CHECK: ret void
@@ -638,11 +638,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @acoshf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -652,7 +652,7 @@ for.end:
 }
 
 declare double @acosh(double) nounwind readnone
-define void @acosh_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @acosh_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @acosh_v2f64(
 ; CHECK: call <2 x double> @_simd_acosh_d2(
 ; CHECK: ret void
@@ -662,11 +662,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @acosh(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -676,7 +676,7 @@ for.end:
 }
 
 declare float @atanhf(float) nounwind readnone
-define void @atanhf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @atanhf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @atanhf_v4f32(
 ; CHECK: call <4 x float> @_simd_atanh_f4(
 ; CHECK: ret void
@@ -686,11 +686,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @atanhf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -700,7 +700,7 @@ for.end:
 }
 
 declare double @atanh(double) nounwind readnone
-define void @atanh_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @atanh_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @atanh_v2f64(
 ; CHECK: call <2 x double> @_simd_atanh_d2(
 ; CHECK: ret void
@@ -710,11 +710,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @atanh(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
index 26511c1910ee8..ab50b3281c020 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
@@ -17,7 +17,7 @@
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-unknown-linux-gnu"
 
-define void @vector_reverse_mask_v4i1(double* noalias %a, double* noalias %cond, i64 %N) #0 {
+define void @vector_reverse_mask_v4i1(ptr noalias %a, ptr noalias %cond, i64 %N) #0 {
 ; CHECK-LABEL: @vector_reverse_mask_v4i1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i64 [[N:%.*]], 0
@@ -33,34 +33,28 @@ define void @vector_reverse_mask_v4i1(double* noalias %a, double* noalias %cond,
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = xor i64 [[INDEX]], -1
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], [[N]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds double, double* [[COND:%.*]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -3
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[TMP3]] to <4 x double>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds double, ptr [[COND:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 -3
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP3]], align 8
 ; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -4
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 -3
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 -4
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i64 -3
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x double>, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[REVERSE2:%.*]] = shufflevector <4 x double> [[WIDE_LOAD1]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP8:%.*]] = fcmp une <4 x double> [[REVERSE]], zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = fcmp une <4 x double> [[REVERSE2]], zeroinitializer
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr double, double* [[A:%.*]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr double, double* [[TMP10]], i64 -3
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i64 -3
 ; CHECK-NEXT:    [[REVERSE3:%.*]] = shufflevector <4 x i1> [[TMP8]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast double* [[TMP11]] to <4 x double>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP12]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison)
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr double, double* [[TMP10]], i64 -4
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr double, double* [[TMP13]], i64 -3
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP11]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison)
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i64 -4
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr double, ptr [[TMP13]], i64 -3
 ; CHECK-NEXT:    [[REVERSE5:%.*]] = shufflevector <4 x i1> [[TMP9]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP15:%.*]] = bitcast double* [[TMP14]] to <4 x double>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP15]], i32 8, <4 x i1> [[REVERSE5]], <4 x double> poison)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP14]], i32 8, <4 x i1> [[REVERSE5]], <4 x double> poison)
 ; CHECK-NEXT:    [[TMP16:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
 ; CHECK-NEXT:    [[TMP17:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD6]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
-; CHECK-NEXT:    [[TMP18:%.*]] = bitcast double* [[TMP11]] to <4 x double>*
-; CHECK-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[TMP16]], <4 x double>* [[TMP18]], i32 8, <4 x i1> [[REVERSE3]])
-; CHECK-NEXT:    [[TMP19:%.*]] = bitcast double* [[TMP14]] to <4 x double>*
-; CHECK-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[TMP17]], <4 x double>* [[TMP19]], i32 8, <4 x i1> [[REVERSE5]])
+; CHECK-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP16]], ptr [[TMP11]], i32 8, <4 x i1> [[REVERSE3]])
+; CHECK-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP17]], ptr [[TMP14]], i32 8, <4 x i1> [[REVERSE5]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -77,15 +71,15 @@ define void @vector_reverse_mask_v4i1(double* noalias %a, double* noalias %cond,
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08_IN:%.*]] = phi i64 [ [[I_08:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[I_08]] = add nsw i64 [[I_08_IN]], -1
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[COND]], i64 [[I_08]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load double, double* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[COND]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load double, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = fcmp une double [[TMP21]], 0.000000e+00
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[I_08]]
-; CHECK-NEXT:    [[TMP22:%.*]] = load double, double* [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP22:%.*]] = load double, ptr [[ARRAYIDX1]], align 8
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP22]], 1.000000e+00
-; CHECK-NEXT:    store double [[ADD]], double* [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    store double [[ADD]], ptr [[ARRAYIDX1]], align 8
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[I_08_IN]], 1
@@ -102,16 +96,16 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup, %
 for.body:                                         ; preds = %for.body, %entry
   %i.08.in = phi i64 [ %i.08, %for.inc ], [ %N, %entry ]
   %i.08 = add nsw i64 %i.08.in, -1
-  %arrayidx = getelementptr inbounds double, double* %cond, i64 %i.08
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %cond, i64 %i.08
+  %0 = load double, ptr %arrayidx, align 8
   %tobool = fcmp une double %0, 0.000000e+00
   br i1 %tobool, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %arrayidx1 = getelementptr inbounds double, double* %a, i64 %i.08
-  %1 = load double, double* %arrayidx1, align 8
+  %arrayidx1 = getelementptr inbounds double, ptr %a, i64 %i.08
+  %1 = load double, ptr %arrayidx1, align 8
   %add = fadd double %1, 1.000000e+00
-  store double %add, double* %arrayidx1, align 8
+  store double %add, ptr %arrayidx1, align 8
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then

diff  --git a/llvm/test/Transforms/LoopVectorize/AMDGPU/divergent-runtime-check.ll b/llvm/test/Transforms/LoopVectorize/AMDGPU/divergent-runtime-check.ll
index cf7247fb52dbd..b1f2ccd556e21 100644
--- a/llvm/test/Transforms/LoopVectorize/AMDGPU/divergent-runtime-check.ll
+++ b/llvm/test/Transforms/LoopVectorize/AMDGPU/divergent-runtime-check.ll
@@ -6,17 +6,17 @@
 ; GCN-NOT: store <2 x half>
 
 ; REMARK: remark: <unknown>:0:0: loop not vectorized: runtime pointer checks needed. Not enabled for divergent target
-define amdgpu_kernel void @runtime_check_divergent_target(half addrspace(1)* nocapture %a, half addrspace(1)* nocapture %b) #0 {
+define amdgpu_kernel void @runtime_check_divergent_target(ptr addrspace(1) nocapture %a, ptr addrspace(1) nocapture %b) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds half, half addrspace(1)* %b, i64 %indvars.iv
-  %load = load half, half addrspace(1)* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds half, ptr addrspace(1) %b, i64 %indvars.iv
+  %load = load half, ptr addrspace(1) %arrayidx, align 4
   %mul = fmul half %load, 3.0
-  %arrayidx2 = getelementptr inbounds half, half addrspace(1)* %a, i64 %indvars.iv
-  store half %mul, half addrspace(1)* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds half, ptr addrspace(1) %a, i64 %indvars.iv
+  store half %mul, ptr addrspace(1) %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024

diff  --git a/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-fp32.ll b/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-fp32.ll
index 26da04e0b10d9..e43a14bb47603 100644
--- a/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-fp32.ll
+++ b/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-fp32.ll
@@ -4,15 +4,15 @@
 ; GFX90A-COUNT-2: load <2 x float>
 ; GFX90A-COUNT-2: fadd fast <2 x float>
 
-define float @vectorize_v2f32_loop(float addrspace(1)* noalias %s) {
+define float @vectorize_v2f32_loop(ptr addrspace(1) noalias %s) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %q.04 = phi float [ 0.0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float addrspace(1)* %s, i64 %indvars.iv
-  %load = load float, float addrspace(1)* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr addrspace(1) %s, i64 %indvars.iv
+  %load = load float, ptr addrspace(1) %arrayidx, align 4
   %add = fadd fast float %q.04, %load
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 256

diff  --git a/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll b/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll
index 626e2f505b9d1..b29abbd78ed73 100644
--- a/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll
+++ b/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll
@@ -3,7 +3,7 @@
 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s -passes=loop-vectorize,dce,instcombine -S | FileCheck -check-prefix=VI %s
 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s -passes=loop-vectorize,dce,instcombine -S | FileCheck -check-prefix=CI %s
 
-define half @vectorize_v2f16_loop(half addrspace(1)* noalias %s) {
+define half @vectorize_v2f16_loop(ptr addrspace(1) noalias %s) {
 ; GFX9-LABEL: @vectorize_v2f16_loop(
 ; GFX9-NEXT:  entry:
 ; GFX9-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -13,12 +13,10 @@ define half @vectorize_v2f16_loop(half addrspace(1)* noalias %s) {
 ; GFX9-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; GFX9-NEXT:    [[VEC_PHI:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; GFX9-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; GFX9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds half, half addrspace(1)* [[S:%.*]], i64 [[INDEX]]
-; GFX9-NEXT:    [[TMP1:%.*]] = bitcast half addrspace(1)* [[TMP0]] to <2 x half> addrspace(1)*
-; GFX9-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x half>, <2 x half> addrspace(1)* [[TMP1]], align 2
-; GFX9-NEXT:    [[TMP2:%.*]] = getelementptr inbounds half, half addrspace(1)* [[TMP0]], i64 2
-; GFX9-NEXT:    [[TMP3:%.*]] = bitcast half addrspace(1)* [[TMP2]] to <2 x half> addrspace(1)*
-; GFX9-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x half>, <2 x half> addrspace(1)* [[TMP3]], align 2
+; GFX9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[S:%.*]], i64 [[INDEX]]
+; GFX9-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP0]], align 2
+; GFX9-NEXT:    [[TMP2:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP0]], i64 2
+; GFX9-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP2]], align 2
 ; GFX9-NEXT:    [[TMP4]] = fadd fast <2 x half> [[VEC_PHI]], [[WIDE_LOAD]]
 ; GFX9-NEXT:    [[TMP5]] = fadd fast <2 x half> [[VEC_PHI1]], [[WIDE_LOAD2]]
 ; GFX9-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -45,12 +43,10 @@ define half @vectorize_v2f16_loop(half addrspace(1)* noalias %s) {
 ; VI-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VI-NEXT:    [[VEC_PHI:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; VI-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; VI-NEXT:    [[TMP0:%.*]] = getelementptr inbounds half, half addrspace(1)* [[S:%.*]], i64 [[INDEX]]
-; VI-NEXT:    [[TMP1:%.*]] = bitcast half addrspace(1)* [[TMP0]] to <2 x half> addrspace(1)*
-; VI-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x half>, <2 x half> addrspace(1)* [[TMP1]], align 2
-; VI-NEXT:    [[TMP2:%.*]] = getelementptr inbounds half, half addrspace(1)* [[TMP0]], i64 2
-; VI-NEXT:    [[TMP3:%.*]] = bitcast half addrspace(1)* [[TMP2]] to <2 x half> addrspace(1)*
-; VI-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x half>, <2 x half> addrspace(1)* [[TMP3]], align 2
+; VI-NEXT:    [[TMP0:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[S:%.*]], i64 [[INDEX]]
+; VI-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP0]], align 2
+; VI-NEXT:    [[TMP2:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP0]], i64 2
+; VI-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP2]], align 2
 ; VI-NEXT:    [[TMP4]] = fadd fast <2 x half> [[VEC_PHI]], [[WIDE_LOAD]]
 ; VI-NEXT:    [[TMP5]] = fadd fast <2 x half> [[VEC_PHI1]], [[WIDE_LOAD2]]
 ; VI-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -74,8 +70,8 @@ define half @vectorize_v2f16_loop(half addrspace(1)* noalias %s) {
 ; CI:       for.body:
 ; CI-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CI-NEXT:    [[Q_04:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; CI-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds half, half addrspace(1)* [[S:%.*]], i64 [[INDVARS_IV]]
-; CI-NEXT:    [[TMP0:%.*]] = load half, half addrspace(1)* [[ARRAYIDX]], align 2
+; CI-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[S:%.*]], i64 [[INDVARS_IV]]
+; CI-NEXT:    [[TMP0:%.*]] = load half, ptr addrspace(1) [[ARRAYIDX]], align 2
 ; CI-NEXT:    [[ADD]] = fadd fast half [[Q_04]], [[TMP0]]
 ; CI-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CI-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 256
@@ -89,8 +85,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %q.04 = phi half [ 0.0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds half, half addrspace(1)* %s, i64 %indvars.iv
-  %0 = load half, half addrspace(1)* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds half, ptr addrspace(1) %s, i64 %indvars.iv
+  %0 = load half, ptr addrspace(1) %arrayidx, align 2
   %add = fadd fast half %q.04, %0
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 256

diff  --git a/llvm/test/Transforms/LoopVectorize/AMDGPU/unroll-in-loop-vectorizer.ll b/llvm/test/Transforms/LoopVectorize/AMDGPU/unroll-in-loop-vectorizer.ll
index 528d06f886ad3..ac67257fb2a40 100644
--- a/llvm/test/Transforms/LoopVectorize/AMDGPU/unroll-in-loop-vectorizer.ll
+++ b/llvm/test/Transforms/LoopVectorize/AMDGPU/unroll-in-loop-vectorizer.ll
@@ -7,17 +7,17 @@
 ; CHECK: store i32
 ; CHECK-NOT: store i32
 ; CHECK: ret
-define amdgpu_kernel void @small_loop(i32* nocapture %inArray, i32 %size) nounwind {
+define amdgpu_kernel void @small_loop(ptr nocapture %inArray, i32 %size) nounwind {
 entry:
   %0 = icmp sgt i32 %size, 0
   br i1 %0, label %loop, label %exit
 
 loop:                                          ; preds = %entry, %loop
   %iv = phi i32 [ %iv1, %loop ], [ 0, %entry ]
-  %1 = getelementptr inbounds i32, i32* %inArray, i32 %iv
-  %2 = load i32, i32* %1, align 4
+  %1 = getelementptr inbounds i32, ptr %inArray, i32 %iv
+  %2 = load i32, ptr %1, align 4
   %3 = add nsw i32 %2, 6
-  store i32 %3, i32* %1, align 4
+  store i32 %3, ptr %1, align 4
   %iv1 = add i32 %iv, 1
 ;  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %cond = icmp eq i32 %iv1, %size

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll b/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll
index 151919f89d8ed..aee0aa4b271f0 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll
@@ -16,7 +16,7 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 ; Integer loops are always vectorizeable
 ; CHECK: Checking a loop in 'sumi'
 ; CHECK: We can vectorize this loop!
-define void @sumi(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B, i32* noalias nocapture %C, i32 %N) {
+define void @sumi(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %N) {
 entry:
   %cmp5 = icmp eq i32 %N, 0
   br i1 %cmp5, label %for.end, label %for.body.preheader
@@ -26,13 +26,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.06 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.06
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %B, i32 %i.06
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.06
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %B, i32 %i.06
+  %1 = load i32, ptr %arrayidx1, align 4
   %mul = mul nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %C, i32 %i.06
-  store i32 %mul, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %C, i32 %i.06
+  store i32 %mul, ptr %arrayidx2, align 4
   %inc = add nuw nsw i32 %i.06, 1
   %exitcond = icmp eq i32 %inc, %N
   br i1 %exitcond, label %for.end.loopexit, label %for.body
@@ -51,7 +51,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; MVE: We can vectorize this loop!
 ; DARWIN: Checking a loop in 'sumf'
 ; DARWIN: We can vectorize this loop!
-define void @sumf(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) {
+define void @sumf(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %N) {
 entry:
   %cmp5 = icmp eq i32 %N, 0
   br i1 %cmp5, label %for.end, label %for.body.preheader
@@ -61,13 +61,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.06 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds float, float* %A, i32 %i.06
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.06
-  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i32 %i.06
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %B, i32 %i.06
+  %1 = load float, ptr %arrayidx1, align 4
   %mul = fmul float %0, %1
-  %arrayidx2 = getelementptr inbounds float, float* %C, i32 %i.06
-  store float %mul, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %C, i32 %i.06
+  store float %mul, ptr %arrayidx2, align 4
   %inc = add nuw nsw i32 %i.06, 1
   %exitcond = icmp eq i32 %inc, %N
   br i1 %exitcond, label %for.end.loopexit, label %for.body
@@ -82,7 +82,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; Integer loops are always vectorizeable
 ; CHECK: Checking a loop in 'redi'
 ; CHECK: We can vectorize this loop!
-define i32 @redi(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) {
+define i32 @redi(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) {
 entry:
   %cmp5 = icmp eq i32 %N, 0
   br i1 %cmp5, label %for.end, label %for.body.preheader
@@ -93,10 +93,10 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %Red.06 = phi i32 [ %add, %for.body ], [ undef, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.07
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %b, i32 %i.07
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.07
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %b, i32 %i.07
+  %1 = load i32, ptr %arrayidx1, align 4
   %mul = mul nsw i32 %1, %0
   %add = add nsw i32 %mul, %Red.06
   %inc = add nuw nsw i32 %i.07, 1
@@ -119,7 +119,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; MVE: We can vectorize this loop!
 ; DARWIN: Checking a loop in 'redf'
 ; DARWIN: We can vectorize this loop!
-define float @redf(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i32 %N) {
+define float @redf(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) {
 entry:
   %cmp5 = icmp eq i32 %N, 0
   br i1 %cmp5, label %for.end, label %for.body.preheader
@@ -130,10 +130,10 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %Red.06 = phi float [ %add, %for.body ], [ undef, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds float, float* %a, i32 %i.07
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %b, i32 %i.07
-  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i32 %i.07
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %b, i32 %i.07
+  %1 = load float, ptr %arrayidx1, align 4
   %mul = fmul float %0, %1
   %add = fadd float %Red.06, %mul
   %inc = add nuw nsw i32 %i.07, 1
@@ -154,21 +154,21 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; LINUX: Potentially unsafe FP op prevents vectorization
 ; DARWIN: Checking a loop in 'fabs'
 ; DARWIN: We can vectorize this loop!
-define void @fabs(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) {
+define void @fabs(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %N) {
 entry:
   %cmp10 = icmp eq i32 %N, 0
   br i1 %cmp10, label %for.end, label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i32 %i.011
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.011
-  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i32 %i.011
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %B, i32 %i.011
+  %1 = load float, ptr %arrayidx1, align 4
   %fabsf = tail call float @fabsf(float %1) #1
   %conv3 = fmul float %0, %fabsf
-  %arrayidx4 = getelementptr inbounds float, float* %C, i32 %i.011
-  store float %conv3, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %C, i32 %i.011
+  store float %conv3, ptr %arrayidx4, align 4
   %inc = add nuw nsw i32 %i.011, 1
   %exitcond = icmp eq i32 %inc, %N
   br i1 %exitcond, label %for.end, label %for.body
@@ -180,7 +180,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; Integer loops are always vectorizeable
 ; CHECK: Checking a loop in 'sumi_fast'
 ; CHECK: We can vectorize this loop!
-define void @sumi_fast(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B, i32* noalias nocapture %C, i32 %N) {
+define void @sumi_fast(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %N) {
 entry:
   %cmp5 = icmp eq i32 %N, 0
   br i1 %cmp5, label %for.end, label %for.body.preheader
@@ -190,13 +190,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.06 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.06
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %B, i32 %i.06
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.06
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %B, i32 %i.06
+  %1 = load i32, ptr %arrayidx1, align 4
   %mul = mul nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %C, i32 %i.06
-  store i32 %mul, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %C, i32 %i.06
+  store i32 %mul, ptr %arrayidx2, align 4
   %inc = add nuw nsw i32 %i.06, 1
   %exitcond = icmp eq i32 %inc, %N
   br i1 %exitcond, label %for.end.loopexit, label %for.body
@@ -211,7 +211,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; Floating-point loops can be vectorizeable with fast-math
 ; CHECK: Checking a loop in 'sumf_fast'
 ; CHECK: We can vectorize this loop!
-define void @sumf_fast(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) {
+define void @sumf_fast(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %N) {
 entry:
   %cmp5 = icmp eq i32 %N, 0
   br i1 %cmp5, label %for.end, label %for.body.preheader
@@ -221,13 +221,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.06 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds float, float* %A, i32 %i.06
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.06
-  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i32 %i.06
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %B, i32 %i.06
+  %1 = load float, ptr %arrayidx1, align 4
   %mul = fmul fast float %1, %0
-  %arrayidx2 = getelementptr inbounds float, float* %C, i32 %i.06
-  store float %mul, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %C, i32 %i.06
+  store float %mul, ptr %arrayidx2, align 4
   %inc = add nuw nsw i32 %i.06, 1
   %exitcond = icmp eq i32 %inc, %N
   br i1 %exitcond, label %for.end.loopexit, label %for.body
@@ -242,7 +242,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; Integer loops are always vectorizeable
 ; CHECK: Checking a loop in 'redi_fast'
 ; CHECK: We can vectorize this loop!
-define i32 @redi_fast(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) {
+define i32 @redi_fast(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) {
 entry:
   %cmp5 = icmp eq i32 %N, 0
   br i1 %cmp5, label %for.end, label %for.body.preheader
@@ -253,10 +253,10 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %Red.06 = phi i32 [ %add, %for.body ], [ undef, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.07
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %b, i32 %i.07
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.07
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %b, i32 %i.07
+  %1 = load i32, ptr %arrayidx1, align 4
   %mul = mul nsw i32 %1, %0
   %add = add nsw i32 %mul, %Red.06
   %inc = add nuw nsw i32 %i.07, 1
@@ -275,7 +275,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; Floating-point loops can be vectorizeable with fast-math
 ; CHECK: Checking a loop in 'redf_fast'
 ; CHECK: We can vectorize this loop!
-define float @redf_fast(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i32 %N) {
+define float @redf_fast(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) {
 entry:
   %cmp5 = icmp eq i32 %N, 0
   br i1 %cmp5, label %for.end, label %for.body.preheader
@@ -286,10 +286,10 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %Red.06 = phi float [ %add, %for.body ], [ undef, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds float, float* %a, i32 %i.07
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %b, i32 %i.07
-  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i32 %i.07
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %b, i32 %i.07
+  %1 = load float, ptr %arrayidx1, align 4
   %mul = fmul fast float %1, %0
   %add = fadd fast float %mul, %Red.06
   %inc = add nuw nsw i32 %i.07, 1
@@ -308,21 +308,21 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; Make sure calls that turn into builtins are also covered
 ; CHECK: Checking a loop in 'fabs_fast'
 ; CHECK: We can vectorize this loop!
-define void @fabs_fast(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) {
+define void @fabs_fast(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %N) {
 entry:
   %cmp10 = icmp eq i32 %N, 0
   br i1 %cmp10, label %for.end, label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i32 %i.011
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.011
-  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i32 %i.011
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %B, i32 %i.011
+  %1 = load float, ptr %arrayidx1, align 4
   %fabsf = tail call fast float @fabsf(float %1) #2
   %conv3 = fmul fast float %fabsf, %0
-  %arrayidx4 = getelementptr inbounds float, float* %C, i32 %i.011
-  store float %conv3, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %C, i32 %i.011
+  store float %conv3, ptr %arrayidx4, align 4
   %inc = add nuw nsw i32 %i.011, 1
   %exitcond = icmp eq i32 %inc, %N
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll b/llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
index b1c400a72dde0..9ad934784ced4 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
@@ -13,15 +13,15 @@ target triple = "thumbv7-apple-ios3.0.0"
 ;SWIFT: load <4 x i32>
 ;SWIFT: load <4 x i32>
 ;SWIFT: ret
-define i32 @foo(i32* nocapture %A, i32 %n) nounwind readonly ssp {
+define i32 @foo(ptr nocapture %A, i32 %n) nounwind readonly ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %i.02 = phi i32 [ %5, %.lr.ph ], [ 0, %0 ]
   %sum.01 = phi i32 [ %4, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i32 %i.02
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i32 %i.02
+  %3 = load i32, ptr %2, align 4
   %4 = add nsw i32 %3, %sum.01
   %5 = add nsw i32 %i.02, 1
   %exitcond = icmp eq i32 %5, %n
@@ -36,7 +36,7 @@ define i32 @foo(i32* nocapture %A, i32 %n) nounwind readonly ssp {
 ;SWIFTUNROLL-LABEL: @register_limit(
 ;SWIFTUNROLL: load i32
 ;SWIFTUNROLL-NOT: load i32
-define i32 @register_limit(i32* nocapture %A, i32 %n) {
+define i32 @register_limit(ptr nocapture %A, i32 %n) {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
@@ -48,8 +48,8 @@ define i32 @register_limit(i32* nocapture %A, i32 %n) {
   %sum.04 = phi i32 [ %8, %.lr.ph ], [ 0, %0 ]
   %sum.05 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
   %sum.06 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i32 %i.02
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i32 %i.02
+  %3 = load i32, ptr %2, align 4
   %4 = add nsw i32 %3, %sum.01
   %5 = add nsw i32 %i.02, 1
   %6 = add nsw i32 %3, %sum.02

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll
index 93f736ec40707..5b0a7fabc4757 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll
@@ -33,32 +33,32 @@ for.body:
   %b.054 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add30, %for.body ]
   %add = add i32 %v.055, %offset
   %mul = mul i32 %add, 3
-  %arrayidx = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i32 0, i32 %mul
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds [512 x float], [512 x float]* @kernel, i32 0, i32 %v.055
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds [1536 x float], ptr @src_data, i32 0, i32 %mul
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds [512 x float], ptr @kernel, i32 0, i32 %v.055
+  %1 = load float, ptr %arrayidx2, align 4
   %mul3 = fmul fast float %0, %1
-  %arrayidx4 = getelementptr inbounds [512 x float], [512 x float]* @kernel2, i32 0, i32 %v.055
-  %2 = load float, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds [512 x float], ptr @kernel2, i32 0, i32 %v.055
+  %2 = load float, ptr %arrayidx4, align 4
   %mul5 = fmul fast float %mul3, %2
-  %arrayidx6 = getelementptr inbounds [512 x float], [512 x float]* @kernel3, i32 0, i32 %v.055
-  %3 = load float, float* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds [512 x float], ptr @kernel3, i32 0, i32 %v.055
+  %3 = load float, ptr %arrayidx6, align 4
   %mul7 = fmul fast float %mul5, %3
-  %arrayidx8 = getelementptr inbounds [512 x float], [512 x float]* @kernel4, i32 0, i32 %v.055
-  %4 = load float, float* %arrayidx8, align 4
+  %arrayidx8 = getelementptr inbounds [512 x float], ptr @kernel4, i32 0, i32 %v.055
+  %4 = load float, ptr %arrayidx8, align 4
   %mul9 = fmul fast float %mul7, %4
   %add10 = fadd fast float %r.057, %mul9
   %arrayidx.sum = add i32 %mul, 1
-  %arrayidx11 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i32 0, i32 %arrayidx.sum
-  %5 = load float, float* %arrayidx11, align 4
+  %arrayidx11 = getelementptr inbounds [1536 x float], ptr @src_data, i32 0, i32 %arrayidx.sum
+  %5 = load float, ptr %arrayidx11, align 4
   %mul13 = fmul fast float %1, %5
   %mul15 = fmul fast float %2, %mul13
   %mul17 = fmul fast float %3, %mul15
   %mul19 = fmul fast float %4, %mul17
   %add20 = fadd fast float %g.056, %mul19
   %arrayidx.sum52 = add i32 %mul, 2
-  %arrayidx21 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i32 0, i32 %arrayidx.sum52
-  %6 = load float, float* %arrayidx21, align 4
+  %arrayidx21 = getelementptr inbounds [1536 x float], ptr @src_data, i32 0, i32 %arrayidx.sum52
+  %6 = load float, ptr %arrayidx21, align 4
   %mul23 = fmul fast float %1, %6
   %mul25 = fmul fast float %2, %mul23
   %mul27 = fmul fast float %3, %mul25
@@ -81,8 +81,8 @@ for.end:
   %r.0.lcssa = phi i8 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
   %g.0.lcssa = phi i8 [ %phitmp60, %for.cond.for.end_crit_edge ], [ 0, %entry ]
   %b.0.lcssa = phi i8 [ %phitmp61, %for.cond.for.end_crit_edge ], [ 0, %entry ]
-  store i8 %r.0.lcssa, i8* @r_, align 4
-  store i8 %g.0.lcssa, i8* @g_, align 4
-  store i8 %b.0.lcssa, i8* @b_, align 4
+  store i8 %r.0.lcssa, ptr @r_, align 4
+  store i8 %g.0.lcssa, ptr @g_, align 4
+  store i8 %b.0.lcssa, ptr @b_, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/gcc-examples.ll b/llvm/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
index def0f5df82bc8..cb8899cab4696 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
@@ -18,13 +18,13 @@ define void @example1() nounwind uwtable ssp {
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %6, i32* %7, align 4
+  %7 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %6, ptr %7, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 256
@@ -39,16 +39,16 @@ define void @example1() nounwind uwtable ssp {
 ;CHECK: sext <4 x i16>
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
-define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
+define void @example10b(ptr noalias nocapture %sa, ptr noalias nocapture %sb, ptr noalias nocapture %sc, ptr noalias nocapture %ia, ptr noalias nocapture %ib, ptr noalias nocapture %ic) nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds i16, i16* %sb, i64 %indvars.iv
-  %3 = load i16, i16* %2, align 2
+  %2 = getelementptr inbounds i16, ptr %sb, i64 %indvars.iv
+  %3 = load i16, ptr %2, align 2
   %4 = sext i16 %3 to i32
-  %5 = getelementptr inbounds i32, i32* %ia, i64 %indvars.iv
-  store i32 %4, i32* %5, align 4
+  %5 = getelementptr inbounds i32, ptr %ia, i64 %indvars.iv
+  store i32 %4, ptr %5, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
index 3142632e2bc2d..d08c16dcb3ed1 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
@@ -8,28 +8,28 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 target triple = "armv8--linux-gnueabihf"
 
 %i8.2 = type {i8, i8}
-define void @i8_factor_2(%i8.2* %data, i64 %n) {
+define void @i8_factor_2(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 ; VF_8-LABEL:  Checking a loop in 'i8_factor_2'
-; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1
+; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i8 0, ptr %tmp1, align 1
 ; VF_16-LABEL: Checking a loop in 'i8_factor_2'
-; VF_16:         Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 2 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1
+; VF_16:         Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 2 for VF 16 For instruction: store i8 0, ptr %tmp1, align 1
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 1
-  %tmp2 = load i8, i8* %tmp0, align 1
-  %tmp3 = load i8, i8* %tmp1, align 1
-  store i8 0, i8* %tmp0, align 1
-  store i8 0, i8* %tmp1, align 1
+  %tmp0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i8, ptr %tmp0, align 1
+  %tmp3 = load i8, ptr %tmp1, align 1
+  store i8 0, ptr %tmp0, align 1
+  store i8 0, ptr %tmp1, align 1
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -39,33 +39,33 @@ for.end:
 }
 
 %i16.2 = type {i16, i16}
-define void @i16_factor_2(%i16.2* %data, i64 %n) {
+define void @i16_factor_2(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 ; VF_4-LABEL:  Checking a loop in 'i16_factor_2'
-; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i16 0, ptr %tmp1, align 2
 ; VF_8-LABEL:  Checking a loop in 'i16_factor_2'
-; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i16 0, ptr %tmp1, align 2
 ; VF_16-LABEL: Checking a loop in 'i16_factor_2'
-; VF_16:         Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 4 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_16:         Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 4 for VF 16 For instruction: store i16 0, ptr %tmp1, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 1
-  %tmp2 = load i16, i16* %tmp0, align 2
-  %tmp3 = load i16, i16* %tmp1, align 2
-  store i16 0, i16* %tmp0, align 2
-  store i16 0, i16* %tmp1, align 2
+  %tmp0 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i16, ptr %tmp0, align 2
+  %tmp3 = load i16, ptr %tmp1, align 2
+  store i16 0, ptr %tmp0, align 2
+  store i16 0, ptr %tmp1, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -75,38 +75,38 @@ for.end:
 }
 
 %i32.2 = type {i32, i32}
-define void @i32_factor_2(%i32.2* %data, i64 %n) {
+define void @i32_factor_2(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i32_factor_2'
-; VF_2:          Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 2 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_2:          Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 2 for VF 2 For instruction: store i32 0, ptr %tmp1, align 4
 ; VF_4-LABEL:  Checking a loop in 'i32_factor_2'
-; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i32 0, ptr %tmp1, align 4
 ; VF_8-LABEL:  Checking a loop in 'i32_factor_2'
-; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i32 0, ptr %tmp1, align 4
 ; VF_16-LABEL: Checking a loop in 'i32_factor_2'
-; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store i32 0, ptr %tmp1, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 1
-  %tmp2 = load i32, i32* %tmp0, align 4
-  %tmp3 = load i32, i32* %tmp1, align 4
-  store i32 0, i32* %tmp0, align 4
-  store i32 0, i32* %tmp1, align 4
+  %tmp0 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i32, ptr %tmp0, align 4
+  %tmp3 = load i32, ptr %tmp1, align 4
+  store i32 0, ptr %tmp0, align 4
+  store i32 0, ptr %tmp1, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -116,28 +116,28 @@ for.end:
 }
 
 %half.2 = type {half, half}
-define void @half_factor_2(%half.2* %data, i64 %n) {
+define void @half_factor_2(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 ; VF_4-LABEL: Checking a loop in 'half_factor_2'
-; VF_4:         Found an estimated cost of 40 for VF 4 For instruction: %tmp2 = load half, half* %tmp0, align 2
-; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, half* %tmp1, align 2
-; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_4-NEXT:    Found an estimated cost of 32 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2
+; VF_4:         Found an estimated cost of 40 for VF 4 For instruction: %tmp2 = load half, ptr %tmp0, align 2
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, ptr %tmp1, align 2
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_4-NEXT:    Found an estimated cost of 32 for VF 4 For instruction: store half 0xH0000, ptr %tmp1, align 2
 ; VF_8-LABEL: Checking a loop in 'half_factor_2'
-; VF_8:         Found an estimated cost of 80 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2
-; VF_8-NEXT:    Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, half* %tmp1, align 2
-; VF_8-NEXT:    Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_8-NEXT:    Found an estimated cost of 64 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2
+; VF_8:         Found an estimated cost of 80 for VF 8 For instruction: %tmp2 = load half, ptr %tmp0, align 2
+; VF_8-NEXT:    Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, ptr %tmp1, align 2
+; VF_8-NEXT:    Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_8-NEXT:    Found an estimated cost of 64 for VF 8 For instruction: store half 0xH0000, ptr %tmp1, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %half.2, %half.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %half.2, %half.2* %data, i64 %i, i32 1
-  %tmp2 = load half, half* %tmp0, align 2
-  %tmp3 = load half, half* %tmp1, align 2
-  store half 0., half* %tmp0, align 2
-  store half 0., half* %tmp1, align 2
+  %tmp0 = getelementptr inbounds %half.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %half.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load half, ptr %tmp0, align 2
+  %tmp3 = load half, ptr %tmp1, align 2
+  store half 0., ptr %tmp0, align 2
+  store half 0., ptr %tmp1, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll
index eccebdcd19f12..97aa2035ded55 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll
@@ -10,38 +10,38 @@ target triple = "thumbv8.1m.main-none-eabi"
 ; Factor 2
 
 %i8.2 = type {i8, i8}
-define void @i8_factor_2(%i8.2* %data, i64 %n) #0 {
+define void @i8_factor_2(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i8_factor_2'
-; VF_2:          Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store i8 0, i8* %tmp1, align 1
+; VF_2:          Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store i8 0, ptr %tmp1, align 1
 ; VF_4-LABEL:  Checking a loop in 'i8_factor_2'
-; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i8 0, i8* %tmp1, align 1
+; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i8 0, ptr %tmp1, align 1
 ; VF_8-LABEL:  Checking a loop in 'i8_factor_2'
-; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1
+; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i8 0, ptr %tmp1, align 1
 ; VF_16-LABEL: Checking a loop in 'i8_factor_2'
-; VF_16:         Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 4 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1
+; VF_16:         Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 4 for VF 16 For instruction: store i8 0, ptr %tmp1, align 1
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 1
-  %tmp2 = load i8, i8* %tmp0, align 1
-  %tmp3 = load i8, i8* %tmp1, align 1
-  store i8 0, i8* %tmp0, align 1
-  store i8 0, i8* %tmp1, align 1
+  %tmp0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i8, ptr %tmp0, align 1
+  %tmp3 = load i8, ptr %tmp1, align 1
+  store i8 0, ptr %tmp0, align 1
+  store i8 0, ptr %tmp1, align 1
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -51,38 +51,38 @@ for.end:
 }
 
 %i16.2 = type {i16, i16}
-define void @i16_factor_2(%i16.2* %data, i64 %n) #0 {
+define void @i16_factor_2(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i16_factor_2'
-; VF_2:          Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_2:          Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store i16 0, ptr %tmp1, align 2
 ; VF_4-LABEL:  Checking a loop in 'i16_factor_2'
-; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i16 0, ptr %tmp1, align 2
 ; VF_8-LABEL:  Checking a loop in 'i16_factor_2'
-; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i16 0, ptr %tmp1, align 2
 ; VF_16-LABEL: Checking a loop in 'i16_factor_2'
-; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store i16 0, ptr %tmp1, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 1
-  %tmp2 = load i16, i16* %tmp0, align 2
-  %tmp3 = load i16, i16* %tmp1, align 2
-  store i16 0, i16* %tmp0, align 2
-  store i16 0, i16* %tmp1, align 2
+  %tmp0 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i16, ptr %tmp0, align 2
+  %tmp3 = load i16, ptr %tmp1, align 2
+  store i16 0, ptr %tmp0, align 2
+  store i16 0, ptr %tmp1, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -92,38 +92,38 @@ for.end:
 }
 
 %i32.2 = type {i32, i32}
-define void @i32_factor_2(%i32.2* %data, i64 %n) #0 {
+define void @i32_factor_2(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i32_factor_2'
-; VF_2:          Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_2:          Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store i32 0, ptr %tmp1, align 4
 ; VF_4-LABEL:  Checking a loop in 'i32_factor_2'
-; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i32 0, ptr %tmp1, align 4
 ; VF_8-LABEL:  Checking a loop in 'i32_factor_2'
-; VF_8:          Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_8:          Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store i32 0, ptr %tmp1, align 4
 ; VF_16-LABEL: Checking a loop in 'i32_factor_2'
-; VF_16:         Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_16:         Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store i32 0, ptr %tmp1, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 1
-  %tmp2 = load i32, i32* %tmp0, align 4
-  %tmp3 = load i32, i32* %tmp1, align 4
-  store i32 0, i32* %tmp0, align 4
-  store i32 0, i32* %tmp1, align 4
+  %tmp0 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i32, ptr %tmp0, align 4
+  %tmp3 = load i32, ptr %tmp1, align 4
+  store i32 0, ptr %tmp0, align 4
+  store i32 0, ptr %tmp1, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -133,38 +133,38 @@ for.end:
 }
 
 %i64.2 = type {i64, i64}
-define void @i64_factor_2(%i64.2* %data, i64 %n) #0 {
+define void @i64_factor_2(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i64_factor_2'
-; VF_2:          Found an estimated cost of 44 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_2:          Found an estimated cost of 44 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i64 0, ptr %tmp1, align 8
 ; VF_4-LABEL:  Checking a loop in 'i64_factor_2'
-; VF_4:          Found an estimated cost of 88 for VF 4 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_4:          Found an estimated cost of 88 for VF 4 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i64 0, ptr %tmp1, align 8
 ; VF_8-LABEL:  Checking a loop in 'i64_factor_2'
-; VF_8:          Found an estimated cost of 176 for VF 8 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_8:          Found an estimated cost of 176 for VF 8 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store i64 0, ptr %tmp1, align 8
 ; VF_16-LABEL: Checking a loop in 'i64_factor_2'
-; VF_16:         Found an estimated cost of 352 for VF 16 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_16:         Found an estimated cost of 352 for VF 16 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store i64 0, ptr %tmp1, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i64.2, %i64.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i64.2, %i64.2* %data, i64 %i, i32 1
-  %tmp2 = load i64, i64* %tmp0, align 8
-  %tmp3 = load i64, i64* %tmp1, align 8
-  store i64 0, i64* %tmp0, align 8
-  store i64 0, i64* %tmp1, align 8
+  %tmp0 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i64, ptr %tmp0, align 8
+  %tmp3 = load i64, ptr %tmp1, align 8
+  store i64 0, ptr %tmp0, align 8
+  store i64 0, ptr %tmp1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -174,38 +174,38 @@ for.end:
 }
 
 %f16.2 = type {half, half}
-define void @f16_factor_2(%f16.2* %data, i64 %n) #0 {
+define void @f16_factor_2(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'f16_factor_2'
-; VF_2:          Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load half, half* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load half, half* %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store half 0xH0000, half* %tmp1, align 2
+; VF_2:          Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load half, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load half, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store half 0xH0000, ptr %tmp1, align 2
 ; VF_4-LABEL:  Checking a loop in 'f16_factor_2'
-; VF_4:          Found an estimated cost of 18 for VF 4 For instruction: %tmp2 = load half, half* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, half* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 16 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2
+; VF_4:          Found an estimated cost of 18 for VF 4 For instruction: %tmp2 = load half, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 16 for VF 4 For instruction: store half 0xH0000, ptr %tmp1, align 2
 ; VF_8-LABEL:  Checking a loop in 'f16_factor_2'
-; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, half* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2
+; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load half, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store half 0xH0000, ptr %tmp1, align 2
 ; VF_16-LABEL: Checking a loop in 'f16_factor_2'
-; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load half, half* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load half, half* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store half 0xH0000, half* %tmp1, align 2
+; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load half, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load half, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store half 0xH0000, ptr %tmp1, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %f16.2, %f16.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %f16.2, %f16.2* %data, i64 %i, i32 1
-  %tmp2 = load half, half* %tmp0, align 2
-  %tmp3 = load half, half* %tmp1, align 2
-  store half 0.0, half* %tmp0, align 2
-  store half 0.0, half* %tmp1, align 2
+  %tmp0 = getelementptr inbounds %f16.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %f16.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load half, ptr %tmp0, align 2
+  %tmp3 = load half, ptr %tmp1, align 2
+  store half 0.0, ptr %tmp0, align 2
+  store half 0.0, ptr %tmp1, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -215,38 +215,38 @@ for.end:
 }
 
 %f32.2 = type {float, float}
-define void @f32_factor_2(%f32.2* %data, i64 %n) #0 {
+define void @f32_factor_2(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'f32_factor_2'
-; VF_2:          Found an estimated cost of 10 for VF 2 For instruction: %tmp2 = load float, float* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load float, float* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4
+; VF_2:          Found an estimated cost of 10 for VF 2 For instruction: %tmp2 = load float, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load float, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
 ; VF_4-LABEL:  Checking a loop in 'f32_factor_2'
-; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load float, float* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load float, float* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4
+; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load float, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load float, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
 ; VF_8-LABEL:  Checking a loop in 'f32_factor_2'
-; VF_8:          Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load float, float* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load float, float* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store float 0.000000e+00, float* %tmp1, align 4
+; VF_8:          Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load float, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load float, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
 ; VF_16-LABEL: Checking a loop in 'f32_factor_2'
-; VF_16:         Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load float, float* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load float, float* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store float 0.000000e+00, float* %tmp1, align 4
+; VF_16:         Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load float, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load float, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %f32.2, %f32.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %f32.2, %f32.2* %data, i64 %i, i32 1
-  %tmp2 = load float, float* %tmp0, align 4
-  %tmp3 = load float, float* %tmp1, align 4
-  store float 0.0, float* %tmp0, align 4
-  store float 0.0, float* %tmp1, align 4
+  %tmp0 = getelementptr inbounds %f32.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %f32.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load float, ptr %tmp0, align 4
+  %tmp3 = load float, ptr %tmp1, align 4
+  store float 0.0, ptr %tmp0, align 4
+  store float 0.0, ptr %tmp1, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -256,38 +256,38 @@ for.end:
 }
 
 %f64.2 = type {double, double}
-define void @f64_factor_2(%f64.2* %data, i64 %n) #0 {
+define void @f64_factor_2(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'f64_factor_2'
-; VF_2:          Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load double, double* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load double, double* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store double 0.000000e+00, double* %tmp1, align 8
+; VF_2:          Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load double, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load double, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
 ; VF_4-LABEL:  Checking a loop in 'f64_factor_2'
-; VF_4:          Found an estimated cost of 24 for VF 4 For instruction: %tmp2 = load double, double* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load double, double* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 16 for VF 4 For instruction: store double 0.000000e+00, double* %tmp1, align 8
+; VF_4:          Found an estimated cost of 24 for VF 4 For instruction: %tmp2 = load double, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load double, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 16 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
 ; VF_8-LABEL:  Checking a loop in 'f64_factor_2'
-; VF_8:          Found an estimated cost of 48 for VF 8 For instruction: %tmp2 = load double, double* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load double, double* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 32 for VF 8 For instruction: store double 0.000000e+00, double* %tmp1, align 8
+; VF_8:          Found an estimated cost of 48 for VF 8 For instruction: %tmp2 = load double, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load double, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 32 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
 ; VF_16-LABEL: Checking a loop in 'f64_factor_2'
-; VF_16:         Found an estimated cost of 96 for VF 16 For instruction: %tmp2 = load double, double* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load double, double* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 64 for VF 16 For instruction: store double 0.000000e+00, double* %tmp1, align 8
+; VF_16:         Found an estimated cost of 96 for VF 16 For instruction: %tmp2 = load double, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load double, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 64 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %f64.2, %f64.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %f64.2, %f64.2* %data, i64 %i, i32 1
-  %tmp2 = load double, double* %tmp0, align 8
-  %tmp3 = load double, double* %tmp1, align 8
-  store double 0.0, double* %tmp0, align 8
-  store double 0.0, double* %tmp1, align 8
+  %tmp0 = getelementptr inbounds %f64.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %f64.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load double, ptr %tmp0, align 8
+  %tmp3 = load double, ptr %tmp1, align 8
+  store double 0.0, ptr %tmp0, align 8
+  store double 0.0, ptr %tmp1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -301,49 +301,49 @@ for.end:
 ; Factor 3
 
 %i8.3 = type {i8, i8, i8}
-define void @i8_factor_3(%i8.3* %data, i64 %n) #0 {
+define void @i8_factor_3(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i8_factor_3'
-; VF_2:          Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i8, i8* %tmp0, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i8, i8* %tmp1, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, i8* %tmp2, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp1, align 1
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i8 0, i8* %tmp2, align 1
+; VF_2:          Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i8 0, ptr %tmp2, align 1
 ; VF_4-LABEL:  Checking a loop in 'i8_factor_3'
-; VF_4:          Found an estimated cost of 72 for VF 4 For instruction: %tmp3 = load i8, i8* %tmp0, align 1
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i8, i8* %tmp1, align 1
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, i8* %tmp2, align 1
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp1, align 1
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i8 0, i8* %tmp2, align 1
+; VF_4:          Found an estimated cost of 72 for VF 4 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i8 0, ptr %tmp2, align 1
 ; VF_8-LABEL:  Checking a loop in 'i8_factor_3'
-; VF_8:          Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i8, i8* %tmp1, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, i8* %tmp2, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store i8 0, i8* %tmp2, align 1
+; VF_8:          Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store i8 0, ptr %tmp2, align 1
 ; VF_16-LABEL: Checking a loop in 'i8_factor_3'
-; VF_16:         Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i8, i8* %tmp1, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, i8* %tmp2, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store i8 0, i8* %tmp2, align 1
+; VF_16:         Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store i8 0, ptr %tmp2, align 1
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i8.3, %i8.3* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i8.3, %i8.3* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %i8.3, %i8.3* %data, i64 %i, i32 2
-  %tmp3 = load i8, i8* %tmp0, align 1
-  %tmp4 = load i8, i8* %tmp1, align 1
-  %tmp5 = load i8, i8* %tmp2, align 1
-  store i8 0, i8* %tmp0, align 1
-  store i8 0, i8* %tmp1, align 1
-  store i8 0, i8* %tmp2, align 1
+  %tmp0 = getelementptr inbounds %i8.3, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i8.3, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %i8.3, ptr %data, i64 %i, i32 2
+  %tmp3 = load i8, ptr %tmp0, align 1
+  %tmp4 = load i8, ptr %tmp1, align 1
+  %tmp5 = load i8, ptr %tmp2, align 1
+  store i8 0, ptr %tmp0, align 1
+  store i8 0, ptr %tmp1, align 1
+  store i8 0, ptr %tmp2, align 1
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -353,49 +353,49 @@ for.end:
 }
 
 %i16.3 = type {i16, i16, i16}
-define void @i16_factor_3(%i16.3* %data, i64 %n) #0 {
+define void @i16_factor_3(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i16_factor_3'
-; VF_2:          Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i16, i16* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i16, i16* %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, i16* %tmp2, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i16 0, i16* %tmp2, align 2
+; VF_2:          Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i16 0, ptr %tmp2, align 2
 ; VF_4-LABEL:  Checking a loop in 'i16_factor_3'
-; VF_4:          Found an estimated cost of 72 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i16, i16* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, i16* %tmp2, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i16 0, i16* %tmp2, align 2
+; VF_4:          Found an estimated cost of 72 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i16 0, ptr %tmp2, align 2
 ; VF_8-LABEL:  Checking a loop in 'i16_factor_3'
-; VF_8:          Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i16, i16* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, i16* %tmp2, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store i16 0, i16* %tmp2, align 2
+; VF_8:          Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store i16 0, ptr %tmp2, align 2
 ; VF_16-LABEL: Checking a loop in 'i16_factor_3'
-; VF_16:         Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i16, i16* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, i16* %tmp2, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store i16 0, i16* %tmp2, align 2
+; VF_16:         Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store i16 0, ptr %tmp2, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i16.3, %i16.3* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i16.3, %i16.3* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %i16.3, %i16.3* %data, i64 %i, i32 2
-  %tmp3 = load i16, i16* %tmp0, align 2
-  %tmp4 = load i16, i16* %tmp1, align 2
-  %tmp5 = load i16, i16* %tmp2, align 2
-  store i16 0, i16* %tmp0, align 2
-  store i16 0, i16* %tmp1, align 2
-  store i16 0, i16* %tmp2, align 2
+  %tmp0 = getelementptr inbounds %i16.3, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i16.3, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %i16.3, ptr %data, i64 %i, i32 2
+  %tmp3 = load i16, ptr %tmp0, align 2
+  %tmp4 = load i16, ptr %tmp1, align 2
+  %tmp5 = load i16, ptr %tmp2, align 2
+  store i16 0, ptr %tmp0, align 2
+  store i16 0, ptr %tmp1, align 2
+  store i16 0, ptr %tmp2, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -405,49 +405,49 @@ for.end:
 }
 
 %i32.3 = type {i32, i32, i32}
-define void @i32_factor_3(%i32.3* %data, i64 %n) #0 {
+define void @i32_factor_3(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i32_factor_3'
-; VF_2:          Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i32, i32* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, i32* %tmp2, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i32 0, i32* %tmp2, align 4
+; VF_2:          Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i32 0, ptr %tmp2, align 4
 ; VF_4-LABEL:  Checking a loop in 'i32_factor_3'
-; VF_4:          Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i32, i32* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, i32* %tmp2, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i32 0, i32* %tmp2, align 4
+; VF_4:          Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i32 0, ptr %tmp2, align 4
 ; VF_8-LABEL:  Checking a loop in 'i32_factor_3'
-; VF_8:          Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i32, i32* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, i32* %tmp2, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store i32 0, i32* %tmp2, align 4
+; VF_8:          Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store i32 0, ptr %tmp2, align 4
 ; VF_16-LABEL: Checking a loop in 'i32_factor_3'
-; VF_16:         Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i32, i32* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, i32* %tmp2, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store i32 0, i32* %tmp2, align 4
+; VF_16:         Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store i32 0, ptr %tmp2, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i32.3, %i32.3* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i32.3, %i32.3* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %i32.3, %i32.3* %data, i64 %i, i32 2
-  %tmp3 = load i32, i32* %tmp0, align 4
-  %tmp4 = load i32, i32* %tmp1, align 4
-  %tmp5 = load i32, i32* %tmp2, align 4
-  store i32 0, i32* %tmp0, align 4
-  store i32 0, i32* %tmp1, align 4
-  store i32 0, i32* %tmp2, align 4
+  %tmp0 = getelementptr inbounds %i32.3, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i32.3, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %i32.3, ptr %data, i64 %i, i32 2
+  %tmp3 = load i32, ptr %tmp0, align 4
+  %tmp4 = load i32, ptr %tmp1, align 4
+  %tmp5 = load i32, ptr %tmp2, align 4
+  store i32 0, ptr %tmp0, align 4
+  store i32 0, ptr %tmp1, align 4
+  store i32 0, ptr %tmp2, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -457,49 +457,49 @@ for.end:
 }
 
 %i64.3 = type {i64, i64, i64}
-define void @i64_factor_3(%i64.3* %data, i64 %n) #0 {
+define void @i64_factor_3(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i64_factor_3'
-; VF_2:          Found an estimated cost of 66 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i64, i64* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, i64* %tmp2, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 18 for VF 2 For instruction: store i64 0, i64* %tmp2, align 8
+; VF_2:          Found an estimated cost of 66 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 18 for VF 2 For instruction: store i64 0, ptr %tmp2, align 8
 ; VF_4-LABEL:  Checking a loop in 'i64_factor_3'
-; VF_4:          Found an estimated cost of 132 for VF 4 For instruction: %tmp3 = load i64, i64* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i64, i64* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, i64* %tmp2, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 36 for VF 4 For instruction: store i64 0, i64* %tmp2, align 8
+; VF_4:          Found an estimated cost of 132 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 36 for VF 4 For instruction: store i64 0, ptr %tmp2, align 8
 ; VF_8-LABEL:  Checking a loop in 'i64_factor_3'
-; VF_8:          Found an estimated cost of 264 for VF 8 For instruction: %tmp3 = load i64, i64* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i64, i64* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, i64* %tmp2, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 72 for VF 8 For instruction: store i64 0, i64* %tmp2, align 8
+; VF_8:          Found an estimated cost of 264 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 72 for VF 8 For instruction: store i64 0, ptr %tmp2, align 8
 ; VF_16-LABEL: Checking a loop in 'i64_factor_3'
-; VF_16:         Found an estimated cost of 528 for VF 16 For instruction: %tmp3 = load i64, i64* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i64, i64* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, i64* %tmp2, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 144 for VF 16 For instruction: store i64 0, i64* %tmp2, align 8
+; VF_16:         Found an estimated cost of 528 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 144 for VF 16 For instruction: store i64 0, ptr %tmp2, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i64.3, %i64.3* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i64.3, %i64.3* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %i64.3, %i64.3* %data, i64 %i, i32 2
-  %tmp3 = load i64, i64* %tmp0, align 8
-  %tmp4 = load i64, i64* %tmp1, align 8
-  %tmp5 = load i64, i64* %tmp2, align 8
-  store i64 0, i64* %tmp0, align 8
-  store i64 0, i64* %tmp1, align 8
-  store i64 0, i64* %tmp2, align 8
+  %tmp0 = getelementptr inbounds %i64.3, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i64.3, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %i64.3, ptr %data, i64 %i, i32 2
+  %tmp3 = load i64, ptr %tmp0, align 8
+  %tmp4 = load i64, ptr %tmp1, align 8
+  %tmp5 = load i64, ptr %tmp2, align 8
+  store i64 0, ptr %tmp0, align 8
+  store i64 0, ptr %tmp1, align 8
+  store i64 0, ptr %tmp2, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -509,49 +509,49 @@ for.end:
 }
 
 %f16.3 = type {half, half, half}
-define void @f16_factor_3(%f16.3* %data, i64 %n) #0 {
+define void @f16_factor_3(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'f16_factor_3'
-; VF_2:          Found an estimated cost of 18 for VF 2 For instruction: %tmp3 = load half, half* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load half, half* %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, half* %tmp2, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store half 0xH0000, half* %tmp2, align 2
+; VF_2:          Found an estimated cost of 18 for VF 2 For instruction: %tmp3 = load half, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load half, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, ptr %tmp2, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store half 0xH0000, ptr %tmp2, align 2
 ; VF_4-LABEL:  Checking a loop in 'f16_factor_3'
-; VF_4:          Found an estimated cost of 28 for VF 4 For instruction: %tmp3 = load half, half* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load half, half* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, half* %tmp2, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store half 0xH0000, half* %tmp2, align 2
+; VF_4:          Found an estimated cost of 28 for VF 4 For instruction: %tmp3 = load half, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load half, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, ptr %tmp2, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store half 0xH0000, ptr %tmp2, align 2
 ; VF_8-LABEL:  Checking a loop in 'f16_factor_3'
-; VF_8:          Found an estimated cost of 56 for VF 8 For instruction: %tmp3 = load half, half* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load half, half* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, half* %tmp2, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store half 0xH0000, half* %tmp2, align 2
+; VF_8:          Found an estimated cost of 56 for VF 8 For instruction: %tmp3 = load half, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load half, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, ptr %tmp2, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store half 0xH0000, ptr %tmp2, align 2
 ; VF_16-LABEL: Checking a loop in 'f16_factor_3'
-; VF_16:         Found an estimated cost of 112 for VF 16 For instruction: %tmp3 = load half, half* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load half, half* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, half* %tmp2, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store half 0xH0000, half* %tmp2, align 2
+; VF_16:         Found an estimated cost of 112 for VF 16 For instruction: %tmp3 = load half, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load half, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, ptr %tmp2, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store half 0xH0000, ptr %tmp2, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %f16.3, %f16.3* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %f16.3, %f16.3* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %f16.3, %f16.3* %data, i64 %i, i32 2
-  %tmp3 = load half, half* %tmp0, align 2
-  %tmp4 = load half, half* %tmp1, align 2
-  %tmp5 = load half, half* %tmp2, align 2
-  store half 0.0, half* %tmp0, align 2
-  store half 0.0, half* %tmp1, align 2
-  store half 0.0, half* %tmp2, align 2
+  %tmp0 = getelementptr inbounds %f16.3, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %f16.3, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %f16.3, ptr %data, i64 %i, i32 2
+  %tmp3 = load half, ptr %tmp0, align 2
+  %tmp4 = load half, ptr %tmp1, align 2
+  %tmp5 = load half, ptr %tmp2, align 2
+  store half 0.0, ptr %tmp0, align 2
+  store half 0.0, ptr %tmp1, align 2
+  store half 0.0, ptr %tmp2, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -561,49 +561,49 @@ for.end:
 }
 
 %f32.3 = type {float, float, float}
-define void @f32_factor_3(%f32.3* %data, i64 %n) #0 {
+define void @f32_factor_3(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'f32_factor_3'
-; VF_2:          Found an estimated cost of 16 for VF 2 For instruction: %tmp3 = load float, float* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load float, float* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, float* %tmp2, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store float 0.000000e+00, float* %tmp2, align 4
+; VF_2:          Found an estimated cost of 16 for VF 2 For instruction: %tmp3 = load float, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load float, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, ptr %tmp2, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
 ; VF_4-LABEL:  Checking a loop in 'f32_factor_3'
-; VF_4:          Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load float, float* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load float, float* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, float* %tmp2, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store float 0.000000e+00, float* %tmp2, align 4
+; VF_4:          Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load float, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load float, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, ptr %tmp2, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
 ; VF_8-LABEL:  Checking a loop in 'f32_factor_3'
-; VF_8:          Found an estimated cost of 64 for VF 8 For instruction: %tmp3 = load float, float* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load float, float* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, float* %tmp2, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store float 0.000000e+00, float* %tmp2, align 4
+; VF_8:          Found an estimated cost of 64 for VF 8 For instruction: %tmp3 = load float, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load float, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, ptr %tmp2, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
 ; VF_16-LABEL: Checking a loop in 'f32_factor_3'
-; VF_16:         Found an estimated cost of 128 for VF 16 For instruction: %tmp3 = load float, float* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load float, float* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, float* %tmp2, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store float 0.000000e+00, float* %tmp2, align 4
+; VF_16:         Found an estimated cost of 128 for VF 16 For instruction: %tmp3 = load float, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load float, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, ptr %tmp2, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %f32.3, %f32.3* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %f32.3, %f32.3* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %f32.3, %f32.3* %data, i64 %i, i32 2
-  %tmp3 = load float, float* %tmp0, align 4
-  %tmp4 = load float, float* %tmp1, align 4
-  %tmp5 = load float, float* %tmp2, align 4
-  store float 0.0, float* %tmp0, align 4
-  store float 0.0, float* %tmp1, align 4
-  store float 0.0, float* %tmp2, align 4
+  %tmp0 = getelementptr inbounds %f32.3, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %f32.3, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %f32.3, ptr %data, i64 %i, i32 2
+  %tmp3 = load float, ptr %tmp0, align 4
+  %tmp4 = load float, ptr %tmp1, align 4
+  %tmp5 = load float, ptr %tmp2, align 4
+  store float 0.0, ptr %tmp0, align 4
+  store float 0.0, ptr %tmp1, align 4
+  store float 0.0, ptr %tmp2, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -613,49 +613,49 @@ for.end:
 }
 
 %f64.3 = type {double, double, double}
-define void @f64_factor_3(%f64.3* %data, i64 %n) #0 {
+define void @f64_factor_3(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'f64_factor_3'
-; VF_2:          Found an estimated cost of 18 for VF 2 For instruction: %tmp3 = load double, double* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load double, double* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, double* %tmp2, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store double 0.000000e+00, double* %tmp2, align 8
+; VF_2:          Found an estimated cost of 18 for VF 2 For instruction: %tmp3 = load double, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load double, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, ptr %tmp2, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
 ; VF_4-LABEL:  Checking a loop in 'f64_factor_3'
-; VF_4:          Found an estimated cost of 36 for VF 4 For instruction: %tmp3 = load double, double* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load double, double* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, double* %tmp2, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store double 0.000000e+00, double* %tmp2, align 8
+; VF_4:          Found an estimated cost of 36 for VF 4 For instruction: %tmp3 = load double, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load double, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, ptr %tmp2, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
 ; VF_8-LABEL:  Checking a loop in 'f64_factor_3'
-; VF_8:          Found an estimated cost of 72 for VF 8 For instruction: %tmp3 = load double, double* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load double, double* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, double* %tmp2, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store double 0.000000e+00, double* %tmp2, align 8
+; VF_8:          Found an estimated cost of 72 for VF 8 For instruction: %tmp3 = load double, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load double, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, ptr %tmp2, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
 ; VF_16-LABEL: Checking a loop in 'f64_factor_3'
-; VF_16:         Found an estimated cost of 144 for VF 16 For instruction: %tmp3 = load double, double* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load double, double* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, double* %tmp2, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store double 0.000000e+00, double* %tmp2, align 8
+; VF_16:         Found an estimated cost of 144 for VF 16 For instruction: %tmp3 = load double, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load double, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, ptr %tmp2, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %f64.3, %f64.3* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %f64.3, %f64.3* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %f64.3, %f64.3* %data, i64 %i, i32 2
-  %tmp3 = load double, double* %tmp0, align 8
-  %tmp4 = load double, double* %tmp1, align 8
-  %tmp5 = load double, double* %tmp2, align 8
-  store double 0.0, double* %tmp0, align 8
-  store double 0.0, double* %tmp1, align 8
-  store double 0.0, double* %tmp2, align 8
+  %tmp0 = getelementptr inbounds %f64.3, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %f64.3, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %f64.3, ptr %data, i64 %i, i32 2
+  %tmp3 = load double, ptr %tmp0, align 8
+  %tmp4 = load double, ptr %tmp1, align 8
+  %tmp5 = load double, ptr %tmp2, align 8
+  store double 0.0, ptr %tmp0, align 8
+  store double 0.0, ptr %tmp1, align 8
+  store double 0.0, ptr %tmp2, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -668,60 +668,60 @@ for.end:
 ; Factor 4
 
 %i8.4 = type {i8, i8, i8, i8}
-define void @i8_factor_4(%i8.4* %data, i64 %n) #0 {
+define void @i8_factor_4(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i8_factor_4'
-; VF_2:          Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i8, i8* %tmp0, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, i8* %tmp1, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i8, i8* %tmp2, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i8, i8* %tmp3, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp1, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp2, align 1
-; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store i8 0, i8* %tmp3, align 1
+; VF_2:          Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, ptr %tmp2, align 1
+; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store i8 0, ptr %tmp3, align 1
 ; VF_4-LABEL: Checking a loop in 'i8_factor_4'
-; VF_4:         Found an estimated cost of 96 for VF 4 For instruction: %tmp4 = load i8, i8* %tmp0, align 1
-; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, i8* %tmp1, align 1
-; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i8, i8* %tmp2, align 1
-; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i8, i8* %tmp3, align 1
-; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp1, align 1
-; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp2, align 1
-; VF_4-NEXT:    Found an estimated cost of 32 for VF 4 For instruction: store i8 0, i8* %tmp3, align 1
+; VF_4:         Found an estimated cost of 96 for VF 4 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 0, ptr %tmp2, align 1
+; VF_4-NEXT:    Found an estimated cost of 32 for VF 4 For instruction: store i8 0, ptr %tmp3, align 1
 ; VF_8-LABEL:  Checking a loop in 'i8_factor_4'
-; VF_8:          Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i8, i8* %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, i8* %tmp1, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i8, i8* %tmp2, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i8, i8* %tmp3, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp2, align 1
-; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store i8 0, i8* %tmp3, align 1
+; VF_8:          Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp2, align 1
+; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store i8 0, ptr %tmp3, align 1
 ; VF_16-LABEL: Checking a loop in 'i8_factor_4'
-; VF_16:         Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i8, i8* %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, i8* %tmp1, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i8, i8* %tmp2, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i8, i8* %tmp3, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp2, align 1
-; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store i8 0, i8* %tmp3, align 1
+; VF_16:         Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp2, align 1
+; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store i8 0, ptr %tmp3, align 1
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 2
-  %tmp3 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 3
-  %tmp4 = load i8, i8* %tmp0, align 1
-  %tmp5 = load i8, i8* %tmp1, align 1
-  %tmp6 = load i8, i8* %tmp2, align 1
-  %tmp7 = load i8, i8* %tmp3, align 1
-  store i8 0, i8* %tmp0, align 1
-  store i8 0, i8* %tmp1, align 1
-  store i8 0, i8* %tmp2, align 1
-  store i8 0, i8* %tmp3, align 1
+  %tmp0 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 2
+  %tmp3 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 3
+  %tmp4 = load i8, ptr %tmp0, align 1
+  %tmp5 = load i8, ptr %tmp1, align 1
+  %tmp6 = load i8, ptr %tmp2, align 1
+  %tmp7 = load i8, ptr %tmp3, align 1
+  store i8 0, ptr %tmp0, align 1
+  store i8 0, ptr %tmp1, align 1
+  store i8 0, ptr %tmp2, align 1
+  store i8 0, ptr %tmp3, align 1
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -731,60 +731,60 @@ for.end:
 }
 
 %i16.4 = type {i16, i16, i16, i16}
-define void @i16_factor_4(%i16.4* %data, i64 %n) #0 {
+define void @i16_factor_4(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i16_factor_4'
-; VF_2:          Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i16, i16* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, i16* %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i16, i16* %tmp2, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i16, i16* %tmp3, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp2, align 2
-; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store i16 0, i16* %tmp3, align 2
+; VF_2:          Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, ptr %tmp2, align 2
+; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store i16 0, ptr %tmp3, align 2
 ; VF_4-LABEL:  Checking a loop in 'i16_factor_4'
-; VF_4:          Found an estimated cost of 96 for VF 4 For instruction: %tmp4 = load i16, i16* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, i16* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i16, i16* %tmp2, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i16, i16* %tmp3, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp2, align 2
-; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store i16 0, i16* %tmp3, align 2
+; VF_4:          Found an estimated cost of 96 for VF 4 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp2, align 2
+; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store i16 0, ptr %tmp3, align 2
 ; VF_8-LABEL:  Checking a loop in 'i16_factor_4'
-; VF_8:          Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i16, i16* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, i16* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i16, i16* %tmp2, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i16, i16* %tmp3, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp2, align 2
-; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store i16 0, i16* %tmp3, align 2
+; VF_8:          Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp2, align 2
+; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store i16 0, ptr %tmp3, align 2
 ; VF_16-LABEL: Checking a loop in 'i16_factor_4'
-; VF_16:         Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i16, i16* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, i16* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i16, i16* %tmp2, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i16, i16* %tmp3, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp2, align 2
-; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store i16 0, i16* %tmp3, align 2
+; VF_16:         Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp2, align 2
+; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store i16 0, ptr %tmp3, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 2
-  %tmp3 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 3
-  %tmp4 = load i16, i16* %tmp0, align 2
-  %tmp5 = load i16, i16* %tmp1, align 2
-  %tmp6 = load i16, i16* %tmp2, align 2
-  %tmp7 = load i16, i16* %tmp3, align 2
-  store i16 0, i16* %tmp0, align 2
-  store i16 0, i16* %tmp1, align 2
-  store i16 0, i16* %tmp2, align 2
-  store i16 0, i16* %tmp3, align 2
+  %tmp0 = getelementptr inbounds %i16.4, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i16.4, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %i16.4, ptr %data, i64 %i, i32 2
+  %tmp3 = getelementptr inbounds %i16.4, ptr %data, i64 %i, i32 3
+  %tmp4 = load i16, ptr %tmp0, align 2
+  %tmp5 = load i16, ptr %tmp1, align 2
+  %tmp6 = load i16, ptr %tmp2, align 2
+  %tmp7 = load i16, ptr %tmp3, align 2
+  store i16 0, ptr %tmp0, align 2
+  store i16 0, ptr %tmp1, align 2
+  store i16 0, ptr %tmp2, align 2
+  store i16 0, ptr %tmp3, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -794,60 +794,60 @@ for.end:
 }
 
 %i32.4 = type {i32, i32, i32, i32}
-define void @i32_factor_4(%i32.4* %data, i64 %n) #0 {
+define void @i32_factor_4(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i32_factor_4'
-; VF_2:          Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i32, i32* %tmp2, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i32, i32* %tmp3, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp2, align 4
-; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store i32 0, i32* %tmp3, align 4
+; VF_2:          Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp2, align 4
+; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store i32 0, ptr %tmp3, align 4
 ; VF_4-LABEL:  Checking a loop in 'i32_factor_4'
-; VF_4:          Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i32, i32* %tmp2, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i32, i32* %tmp3, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp2, align 4
-; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store i32 0, i32* %tmp3, align 4
+; VF_4:          Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp2, align 4
+; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store i32 0, ptr %tmp3, align 4
 ; VF_8-LABEL:  Checking a loop in 'i32_factor_4'
-; VF_8:          Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i32, i32* %tmp2, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i32, i32* %tmp3, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp2, align 4
-; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store i32 0, i32* %tmp3, align 4
+; VF_8:          Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp2, align 4
+; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store i32 0, ptr %tmp3, align 4
 ; VF_16-LABEL: Checking a loop in 'i32_factor_4'
-; VF_16:         Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i32, i32* %tmp2, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i32, i32* %tmp3, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp2, align 4
-; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store i32 0, i32* %tmp3, align 4
+; VF_16:         Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp2, align 4
+; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store i32 0, ptr %tmp3, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 2
-  %tmp3 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 3
-  %tmp4 = load i32, i32* %tmp0, align 4
-  %tmp5 = load i32, i32* %tmp1, align 4
-  %tmp6 = load i32, i32* %tmp2, align 4
-  %tmp7 = load i32, i32* %tmp3, align 4
-  store i32 0, i32* %tmp0, align 4
-  store i32 0, i32* %tmp1, align 4
-  store i32 0, i32* %tmp2, align 4
-  store i32 0, i32* %tmp3, align 4
+  %tmp0 = getelementptr inbounds %i32.4, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i32.4, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %i32.4, ptr %data, i64 %i, i32 2
+  %tmp3 = getelementptr inbounds %i32.4, ptr %data, i64 %i, i32 3
+  %tmp4 = load i32, ptr %tmp0, align 4
+  %tmp5 = load i32, ptr %tmp1, align 4
+  %tmp6 = load i32, ptr %tmp2, align 4
+  %tmp7 = load i32, ptr %tmp3, align 4
+  store i32 0, ptr %tmp0, align 4
+  store i32 0, ptr %tmp1, align 4
+  store i32 0, ptr %tmp2, align 4
+  store i32 0, ptr %tmp3, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -857,60 +857,60 @@ for.end:
 }
 
 %i64.4 = type {i64, i64, i64, i64}
-define void @i64_factor_4(%i64.4* %data, i64 %n) #0 {
+define void @i64_factor_4(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i64_factor_4'
-; VF_2:          Found an estimated cost of 88 for VF 2 For instruction: %tmp4 = load i64, i64* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, i64* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i64, i64* %tmp2, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i64, i64* %tmp3, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp2, align 8
-; VF_2-NEXT:     Found an estimated cost of 24 for VF 2 For instruction: store i64 0, i64* %tmp3, align 8
+; VF_2:          Found an estimated cost of 88 for VF 2 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp2, align 8
+; VF_2-NEXT:     Found an estimated cost of 24 for VF 2 For instruction: store i64 0, ptr %tmp3, align 8
 ; VF_4-LABEL:  Checking a loop in 'i64_factor_4'
-; VF_4:          Found an estimated cost of 176 for VF 4 For instruction: %tmp4 = load i64, i64* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, i64* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i64, i64* %tmp2, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i64, i64* %tmp3, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp2, align 8
-; VF_4-NEXT:     Found an estimated cost of 48 for VF 4 For instruction: store i64 0, i64* %tmp3, align 8
+; VF_4:          Found an estimated cost of 176 for VF 4 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp2, align 8
+; VF_4-NEXT:     Found an estimated cost of 48 for VF 4 For instruction: store i64 0, ptr %tmp3, align 8
 ; VF_8-LABEL:  Checking a loop in 'i64_factor_4'
-; VF_8:          Found an estimated cost of 352 for VF 8 For instruction: %tmp4 = load i64, i64* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, i64* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i64, i64* %tmp2, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i64, i64* %tmp3, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp2, align 8
-; VF_8-NEXT:     Found an estimated cost of 96 for VF 8 For instruction: store i64 0, i64* %tmp3, align 8
+; VF_8:          Found an estimated cost of 352 for VF 8 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp2, align 8
+; VF_8-NEXT:     Found an estimated cost of 96 for VF 8 For instruction: store i64 0, ptr %tmp3, align 8
 ; VF_16-LABEL: Checking a loop in 'i64_factor_4'
-; VF_16:         Found an estimated cost of 704 for VF 16 For instruction: %tmp4 = load i64, i64* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, i64* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i64, i64* %tmp2, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i64, i64* %tmp3, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp2, align 8
-; VF_16-NEXT:    Found an estimated cost of 192 for VF 16 For instruction: store i64 0, i64* %tmp3, align 8
+; VF_16:         Found an estimated cost of 704 for VF 16 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp2, align 8
+; VF_16-NEXT:    Found an estimated cost of 192 for VF 16 For instruction: store i64 0, ptr %tmp3, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i64.4, %i64.4* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i64.4, %i64.4* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %i64.4, %i64.4* %data, i64 %i, i32 2
-  %tmp3 = getelementptr inbounds %i64.4, %i64.4* %data, i64 %i, i32 3
-  %tmp4 = load i64, i64* %tmp0, align 8
-  %tmp5 = load i64, i64* %tmp1, align 8
-  %tmp6 = load i64, i64* %tmp2, align 8
-  %tmp7 = load i64, i64* %tmp3, align 8
-  store i64 0, i64* %tmp0, align 8
-  store i64 0, i64* %tmp1, align 8
-  store i64 0, i64* %tmp2, align 8
-  store i64 0, i64* %tmp3, align 8
+  %tmp0 = getelementptr inbounds %i64.4, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i64.4, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %i64.4, ptr %data, i64 %i, i32 2
+  %tmp3 = getelementptr inbounds %i64.4, ptr %data, i64 %i, i32 3
+  %tmp4 = load i64, ptr %tmp0, align 8
+  %tmp5 = load i64, ptr %tmp1, align 8
+  %tmp6 = load i64, ptr %tmp2, align 8
+  %tmp7 = load i64, ptr %tmp3, align 8
+  store i64 0, ptr %tmp0, align 8
+  store i64 0, ptr %tmp1, align 8
+  store i64 0, ptr %tmp2, align 8
+  store i64 0, ptr %tmp3, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -920,60 +920,60 @@ for.end:
 }
 
 %f16.4 = type {half, half, half, half}
-define void @f16_factor_4(%f16.4* %data, i64 %n) #0 {
+define void @f16_factor_4(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'f16_factor_4'
-; VF_2:          Found an estimated cost of 18 for VF 2 For instruction: %tmp4 = load half, half* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, half* %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load half, half* %tmp2, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load half, half* %tmp3, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp2, align 2
-; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store half 0xH0000, half* %tmp3, align 2
+; VF_2:          Found an estimated cost of 18 for VF 2 For instruction: %tmp4 = load half, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load half, ptr %tmp2, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load half, ptr %tmp3, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, ptr %tmp2, align 2
+; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store half 0xH0000, ptr %tmp3, align 2
 ; VF_4-LABEL:  Checking a loop in 'f16_factor_4'
-; VF_4:          Found an estimated cost of 36 for VF 4 For instruction: %tmp4 = load half, half* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, half* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load half, half* %tmp2, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load half, half* %tmp3, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp2, align 2
-; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store half 0xH0000, half* %tmp3, align 2
+; VF_4:          Found an estimated cost of 36 for VF 4 For instruction: %tmp4 = load half, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load half, ptr %tmp2, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load half, ptr %tmp3, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp2, align 2
+; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store half 0xH0000, ptr %tmp3, align 2
 ; VF_8-LABEL:  Checking a loop in 'f16_factor_4'
-; VF_8:          Found an estimated cost of 72 for VF 8 For instruction: %tmp4 = load half, half* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, half* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load half, half* %tmp2, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load half, half* %tmp3, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp2, align 2
-; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store half 0xH0000, half* %tmp3, align 2
+; VF_8:          Found an estimated cost of 72 for VF 8 For instruction: %tmp4 = load half, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load half, ptr %tmp2, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load half, ptr %tmp3, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp2, align 2
+; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store half 0xH0000, ptr %tmp3, align 2
 ; VF_16-LABEL: Checking a loop in 'f16_factor_4'
-; VF_16:         Found an estimated cost of 144 for VF 16 For instruction: %tmp4 = load half, half* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, half* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load half, half* %tmp2, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load half, half* %tmp3, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp2, align 2
-; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store half 0xH0000, half* %tmp3, align 2
+; VF_16:         Found an estimated cost of 144 for VF 16 For instruction: %tmp4 = load half, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load half, ptr %tmp2, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load half, ptr %tmp3, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, ptr %tmp2, align 2
+; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store half 0xH0000, ptr %tmp3, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %f16.4, %f16.4* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %f16.4, %f16.4* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %f16.4, %f16.4* %data, i64 %i, i32 2
-  %tmp3 = getelementptr inbounds %f16.4, %f16.4* %data, i64 %i, i32 3
-  %tmp4 = load half, half* %tmp0, align 2
-  %tmp5 = load half, half* %tmp1, align 2
-  %tmp6 = load half, half* %tmp2, align 2
-  %tmp7 = load half, half* %tmp3, align 2
-  store half 0.0, half* %tmp0, align 2
-  store half 0.0, half* %tmp1, align 2
-  store half 0.0, half* %tmp2, align 2
-  store half 0.0, half* %tmp3, align 2
+  %tmp0 = getelementptr inbounds %f16.4, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %f16.4, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %f16.4, ptr %data, i64 %i, i32 2
+  %tmp3 = getelementptr inbounds %f16.4, ptr %data, i64 %i, i32 3
+  %tmp4 = load half, ptr %tmp0, align 2
+  %tmp5 = load half, ptr %tmp1, align 2
+  %tmp6 = load half, ptr %tmp2, align 2
+  %tmp7 = load half, ptr %tmp3, align 2
+  store half 0.0, ptr %tmp0, align 2
+  store half 0.0, ptr %tmp1, align 2
+  store half 0.0, ptr %tmp2, align 2
+  store half 0.0, ptr %tmp3, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -983,60 +983,60 @@ for.end:
 }
 
 %f32.4 = type {float, float, float, float}
-define void @f32_factor_4(%f32.4* %data, i64 %n) #0 {
+define void @f32_factor_4(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'f32_factor_4'
-; VF_2:          Found an estimated cost of 20 for VF 2 For instruction: %tmp4 = load float, float* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, float* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load float, float* %tmp2, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load float, float* %tmp3, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp2, align 4
-; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store float 0.000000e+00, float* %tmp3, align 4
+; VF_2:          Found an estimated cost of 20 for VF 2 For instruction: %tmp4 = load float, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load float, ptr %tmp2, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load float, ptr %tmp3, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
+; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp3, align 4
 ; VF_4-LABEL:  Checking a loop in 'f32_factor_4'
-; VF_4:          Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load float, float* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, float* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load float, float* %tmp2, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load float, float* %tmp3, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp2, align 4
-; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store float 0.000000e+00, float* %tmp3, align 4
+; VF_4:          Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load float, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load float, ptr %tmp2, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load float, ptr %tmp3, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
+; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp3, align 4
 ; VF_8-LABEL:  Checking a loop in 'f32_factor_4'
-; VF_8:          Found an estimated cost of 80 for VF 8 For instruction: %tmp4 = load float, float* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, float* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load float, float* %tmp2, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load float, float* %tmp3, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp2, align 4
-; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store float 0.000000e+00, float* %tmp3, align 4
+; VF_8:          Found an estimated cost of 80 for VF 8 For instruction: %tmp4 = load float, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load float, ptr %tmp2, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load float, ptr %tmp3, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
+; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp3, align 4
 ; VF_16-LABEL: Checking a loop in 'f32_factor_4'
-; VF_16:         Found an estimated cost of 160 for VF 16 For instruction: %tmp4 = load float, float* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, float* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load float, float* %tmp2, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load float, float* %tmp3, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp2, align 4
-; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store float 0.000000e+00, float* %tmp3, align 4
+; VF_16:         Found an estimated cost of 160 for VF 16 For instruction: %tmp4 = load float, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load float, ptr %tmp2, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load float, ptr %tmp3, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
+; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp3, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %f32.4, %f32.4* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %f32.4, %f32.4* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %f32.4, %f32.4* %data, i64 %i, i32 2
-  %tmp3 = getelementptr inbounds %f32.4, %f32.4* %data, i64 %i, i32 3
-  %tmp4 = load float, float* %tmp0, align 4
-  %tmp5 = load float, float* %tmp1, align 4
-  %tmp6 = load float, float* %tmp2, align 4
-  %tmp7 = load float, float* %tmp3, align 4
-  store float 0.0, float* %tmp0, align 4
-  store float 0.0, float* %tmp1, align 4
-  store float 0.0, float* %tmp2, align 4
-  store float 0.0, float* %tmp3, align 4
+  %tmp0 = getelementptr inbounds %f32.4, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %f32.4, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %f32.4, ptr %data, i64 %i, i32 2
+  %tmp3 = getelementptr inbounds %f32.4, ptr %data, i64 %i, i32 3
+  %tmp4 = load float, ptr %tmp0, align 4
+  %tmp5 = load float, ptr %tmp1, align 4
+  %tmp6 = load float, ptr %tmp2, align 4
+  %tmp7 = load float, ptr %tmp3, align 4
+  store float 0.0, ptr %tmp0, align 4
+  store float 0.0, ptr %tmp1, align 4
+  store float 0.0, ptr %tmp2, align 4
+  store float 0.0, ptr %tmp3, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -1046,60 +1046,60 @@ for.end:
 }
 
 %f64.4 = type {double, double, double, double}
-define void @f64_factor_4(%f64.4* %data, i64 %n) #0 {
+define void @f64_factor_4(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'f64_factor_4'
-; VF_2:          Found an estimated cost of 24 for VF 2 For instruction: %tmp4 = load double, double* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, double* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load double, double* %tmp2, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load double, double* %tmp3, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp2, align 8
-; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store double 0.000000e+00, double* %tmp3, align 8
+; VF_2:          Found an estimated cost of 24 for VF 2 For instruction: %tmp4 = load double, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load double, ptr %tmp2, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load double, ptr %tmp3, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
+; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp3, align 8
 ; VF_4-LABEL:  Checking a loop in 'f64_factor_4'
-; VF_4:          Found an estimated cost of 48 for VF 4 For instruction: %tmp4 = load double, double* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, double* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load double, double* %tmp2, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load double, double* %tmp3, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp2, align 8
-; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store double 0.000000e+00, double* %tmp3, align 8
+; VF_4:          Found an estimated cost of 48 for VF 4 For instruction: %tmp4 = load double, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load double, ptr %tmp2, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load double, ptr %tmp3, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
+; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp3, align 8
 ; VF_8-LABEL:  Checking a loop in 'f64_factor_4'
-; VF_8:          Found an estimated cost of 96 for VF 8 For instruction: %tmp4 = load double, double* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, double* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load double, double* %tmp2, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load double, double* %tmp3, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp2, align 8
-; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store double 0.000000e+00, double* %tmp3, align 8
+; VF_8:          Found an estimated cost of 96 for VF 8 For instruction: %tmp4 = load double, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load double, ptr %tmp2, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load double, ptr %tmp3, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
+; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp3, align 8
 ; VF_16-LABEL: Checking a loop in 'f64_factor_4'
-; VF_16:         Found an estimated cost of 192 for VF 16 For instruction: %tmp4 = load double, double* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, double* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load double, double* %tmp2, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load double, double* %tmp3, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp2, align 8
-; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store double 0.000000e+00, double* %tmp3, align 8
+; VF_16:         Found an estimated cost of 192 for VF 16 For instruction: %tmp4 = load double, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load double, ptr %tmp2, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load double, ptr %tmp3, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
+; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp3, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %f64.4, %f64.4* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %f64.4, %f64.4* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %f64.4, %f64.4* %data, i64 %i, i32 2
-  %tmp3 = getelementptr inbounds %f64.4, %f64.4* %data, i64 %i, i32 3
-  %tmp4 = load double, double* %tmp0, align 8
-  %tmp5 = load double, double* %tmp1, align 8
-  %tmp6 = load double, double* %tmp2, align 8
-  %tmp7 = load double, double* %tmp3, align 8
-  store double 0.0, double* %tmp0, align 8
-  store double 0.0, double* %tmp1, align 8
-  store double 0.0, double* %tmp2, align 8
-  store double 0.0, double* %tmp3, align 8
+  %tmp0 = getelementptr inbounds %f64.4, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %f64.4, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %f64.4, ptr %data, i64 %i, i32 2
+  %tmp3 = getelementptr inbounds %f64.4, ptr %data, i64 %i, i32 3
+  %tmp4 = load double, ptr %tmp0, align 8
+  %tmp5 = load double, ptr %tmp1, align 8
+  %tmp6 = load double, ptr %tmp2, align 8
+  %tmp7 = load double, ptr %tmp3, align 8
+  store double 0.0, ptr %tmp0, align 8
+  store double 0.0, ptr %tmp1, align 8
+  store double 0.0, ptr %tmp2, align 8
+  store double 0.0, ptr %tmp3, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll
index fe02be793f2b6..d4bb5872b84c9 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll
@@ -4,8 +4,8 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-none-eabi"
 
 ; CHECK-LABEL: test_i32_align4
-; CHECK: call void @llvm.masked.store.v4i32.p0v4i32
-define void @test_i32_align4(i32* nocapture %A, i32 %n) #0 {
+; CHECK: call void @llvm.masked.store.v4i32.p0
+define void @test_i32_align4(ptr nocapture %A, i32 %n) #0 {
 entry:
   %cmp12 = icmp sgt i32 %n, 0
   br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup
@@ -15,14 +15,14 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.inc
   %i.013 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.013
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.013
+  %0 = load i32, ptr %arrayidx, align 4
   %.off = add i32 %0, 9
   %1 = icmp ult i32 %.off, 19
   br i1 %1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  store i32 0, i32* %arrayidx, align 4
+  store i32 0, ptr %arrayidx, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -39,7 +39,7 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 
 ; CHECK-LABEL: test_i32_align2
 ; CHECK-NOT: call void @llvm.masked.store
-define void @test_i32_align2(i32* nocapture %A, i32 %n) #0 {
+define void @test_i32_align2(ptr nocapture %A, i32 %n) #0 {
 entry:
   %cmp12 = icmp sgt i32 %n, 0
   br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup
@@ -49,14 +49,14 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.inc
   %i.013 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.013
-  %0 = load i32, i32* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.013
+  %0 = load i32, ptr %arrayidx, align 2
   %.off = add i32 %0, 9
   %1 = icmp ult i32 %.off, 19
   br i1 %1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  store i32 0, i32* %arrayidx, align 2
+  store i32 0, ptr %arrayidx, align 2
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -72,8 +72,8 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 }
 
 ; CHECK-LABEL: test_i32_noalign
-; CHECK: call void @llvm.masked.store.v4i32.p0v4i32
-define void @test_i32_noalign(i32* nocapture %A, i32 %n) #0 {
+; CHECK: call void @llvm.masked.store.v4i32.p0
+define void @test_i32_noalign(ptr nocapture %A, i32 %n) #0 {
 entry:
   %cmp12 = icmp sgt i32 %n, 0
   br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup
@@ -83,14 +83,14 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.inc
   %i.013 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.013
-  %0 = load i32, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.013
+  %0 = load i32, ptr %arrayidx
   %.off = add i32 %0, 9
   %1 = icmp ult i32 %.off, 19
   br i1 %1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  store i32 0, i32* %arrayidx
+  store i32 0, ptr %arrayidx
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -106,8 +106,8 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 }
 
 ; CHECK-LABEL: test_i16_align2
-; CHECK: call void @llvm.masked.store.v8i16.p0v8i16
-define void @test_i16_align2(i16* nocapture %A, i32 %n) #0 {
+; CHECK: call void @llvm.masked.store.v8i16.p0
+define void @test_i16_align2(ptr nocapture %A, i32 %n) #0 {
 entry:
   %cmp12 = icmp sgt i32 %n, 0
   br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup
@@ -117,14 +117,14 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.inc
   %i.013 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i16, i16* %A, i32 %i.013
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %A, i32 %i.013
+  %0 = load i16, ptr %arrayidx, align 2
   %.off = add i16 %0, 9
   %1 = icmp ult i16 %.off, 19
   br i1 %1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  store i16 0, i16* %arrayidx, align 2
+  store i16 0, ptr %arrayidx, align 2
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -141,7 +141,7 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 
 ; CHECK-LABEL: test_i16_align1
 ; CHECK-NOT: call void @llvm.masked.store
-define void @test_i16_align1(i16* nocapture %A, i32 %n) #0 {
+define void @test_i16_align1(ptr nocapture %A, i32 %n) #0 {
 entry:
   %cmp12 = icmp sgt i32 %n, 0
   br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup
@@ -151,14 +151,14 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.inc
   %i.013 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i16, i16* %A, i32 %i.013
-  %0 = load i16, i16* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i16, ptr %A, i32 %i.013
+  %0 = load i16, ptr %arrayidx, align 1
   %.off = add i16 %0, 9
   %1 = icmp ult i16 %.off, 19
   br i1 %1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  store i16 0, i16* %arrayidx, align 1
+  store i16 0, ptr %arrayidx, align 1
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-predstorecost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-predstorecost.ll
index d4f17602ae243..3c3f8de065d13 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-predstorecost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-predstorecost.ll
@@ -8,7 +8,7 @@ target triple = "thumbv8.1m.main-arm-none-eabi"
 ; should never choose scalar load/store, and the cost of gather/scatter may be
 ; high enough to make vectorization unwarranted.
 
-define i32 @nested(float* nocapture %pG, float* nocapture readonly %pA, i32 %n, i32 %ii) #0 {
+define i32 @nested(ptr nocapture %pG, ptr nocapture readonly %pA, i32 %n, i32 %ii) #0 {
 ; CHECK-LABEL: @nested(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP66:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -28,23 +28,23 @@ define i32 @nested(float* nocapture %pG, float* nocapture readonly %pA, i32 %n,
 ; CHECK-NEXT:    [[J_065_US:%.*]] = phi i32 [ [[INC26_US:%.*]], [[FOR_COND8_FOR_COND_CLEANUP10_CRIT_EDGE_US:%.*]] ], [ [[I_067]], [[FOR_BODY4_US_PREHEADER]] ]
 ; CHECK-NEXT:    [[MUL_US:%.*]] = mul nsw i32 [[J_065_US]], [[N]]
 ; CHECK-NEXT:    [[ADD_US:%.*]] = add nsw i32 [[MUL_US]], [[I_067]]
-; CHECK-NEXT:    [[ARRAYIDX_US:%.*]] = getelementptr inbounds float, float* [[PA:%.*]], i32 [[ADD_US]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX_US]], align 4
-; CHECK-NEXT:    [[ARRAYIDX7_US:%.*]] = getelementptr inbounds float, float* [[PG:%.*]], i32 [[ADD_US]]
-; CHECK-NEXT:    store float [[TMP0]], float* [[ARRAYIDX7_US]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_US:%.*]] = getelementptr inbounds float, ptr [[PA:%.*]], i32 [[ADD_US]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX_US]], align 4
+; CHECK-NEXT:    [[ARRAYIDX7_US:%.*]] = getelementptr inbounds float, ptr [[PG:%.*]], i32 [[ADD_US]]
+; CHECK-NEXT:    store float [[TMP0]], ptr [[ARRAYIDX7_US]], align 4
 ; CHECK-NEXT:    br label [[FOR_BODY11_US:%.*]]
 ; CHECK:       for.body11.us:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi float [ [[TMP0]], [[FOR_BODY4_US]] ], [ [[SUB_US:%.*]], [[FOR_BODY11_US]] ]
 ; CHECK-NEXT:    [[K_063_US:%.*]] = phi i32 [ 0, [[FOR_BODY4_US]] ], [ [[INC_US:%.*]], [[FOR_BODY11_US]] ]
 ; CHECK-NEXT:    [[ADD16_US:%.*]] = add nsw i32 [[K_063_US]], [[MUL15]]
-; CHECK-NEXT:    [[ARRAYIDX17_US:%.*]] = getelementptr inbounds float, float* [[PG]], i32 [[ADD16_US]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* [[ARRAYIDX17_US]], align 4
+; CHECK-NEXT:    [[ARRAYIDX17_US:%.*]] = getelementptr inbounds float, ptr [[PG]], i32 [[ADD16_US]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX17_US]], align 4
 ; CHECK-NEXT:    [[ADD19_US:%.*]] = add nsw i32 [[K_063_US]], [[MUL_US]]
-; CHECK-NEXT:    [[ARRAYIDX20_US:%.*]] = getelementptr inbounds float, float* [[PG]], i32 [[ADD19_US]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* [[ARRAYIDX20_US]], align 4
+; CHECK-NEXT:    [[ARRAYIDX20_US:%.*]] = getelementptr inbounds float, ptr [[PG]], i32 [[ADD19_US]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[ARRAYIDX20_US]], align 4
 ; CHECK-NEXT:    [[MUL21_US:%.*]] = fmul fast float [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[SUB_US]] = fsub fast float [[TMP1]], [[MUL21_US]]
-; CHECK-NEXT:    store float [[SUB_US]], float* [[ARRAYIDX7_US]], align 4
+; CHECK-NEXT:    store float [[SUB_US]], ptr [[ARRAYIDX7_US]], align 4
 ; CHECK-NEXT:    [[INC_US]] = add nuw nsw i32 [[K_063_US]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_US]], [[I_067]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND8_FOR_COND_CLEANUP10_CRIT_EDGE_US]], label [[FOR_BODY11_US]]
@@ -67,10 +67,10 @@ define i32 @nested(float* nocapture %pG, float* nocapture readonly %pA, i32 %n,
 ; CHECK:       for.body4:
 ; CHECK-NEXT:    [[J_065:%.*]] = phi i32 [ [[INC26:%.*]], [[FOR_BODY4]] ], [ 0, [[FOR_BODY4_PREHEADER]] ]
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[J_065]], [[N]]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[PA]], i32 [[MUL]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load float, float* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[PG]], i32 [[MUL]]
-; CHECK-NEXT:    store float [[TMP4]], float* [[ARRAYIDX7]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[PA]], i32 [[MUL]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[PG]], i32 [[MUL]]
+; CHECK-NEXT:    store float [[TMP4]], ptr [[ARRAYIDX7]], align 4
 ; CHECK-NEXT:    [[INC26]] = add nuw nsw i32 [[J_065]], 1
 ; CHECK-NEXT:    [[EXITCOND72_NOT:%.*]] = icmp eq i32 [[INC26]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND72_NOT]], label [[FOR_COND_CLEANUP3_LOOPEXIT:%.*]], label [[FOR_BODY4]]
@@ -89,24 +89,24 @@ for.body4.us:                                     ; preds = %for.body4.lr.ph, %f
   %j.065.us = phi i32 [ %inc26.us, %for.cond8.for.cond.cleanup10_crit_edge.us ], [ %i.067, %for.body4.lr.ph ]
   %mul.us = mul nsw i32 %j.065.us, %n
   %add.us = add nsw i32 %mul.us, %i.067
-  %arrayidx.us = getelementptr inbounds float, float* %pA, i32 %add.us
-  %0 = load float, float* %arrayidx.us, align 4
-  %arrayidx7.us = getelementptr inbounds float, float* %pG, i32 %add.us
-  store float %0, float* %arrayidx7.us, align 4
+  %arrayidx.us = getelementptr inbounds float, ptr %pA, i32 %add.us
+  %0 = load float, ptr %arrayidx.us, align 4
+  %arrayidx7.us = getelementptr inbounds float, ptr %pG, i32 %add.us
+  store float %0, ptr %arrayidx7.us, align 4
   br label %for.body11.us
 
 for.body11.us:                                    ; preds = %for.body4.us, %for.body11.us
   %1 = phi float [ %0, %for.body4.us ], [ %sub.us, %for.body11.us ]
   %k.063.us = phi i32 [ 0, %for.body4.us ], [ %inc.us, %for.body11.us ]
   %add16.us = add nsw i32 %k.063.us, %mul15
-  %arrayidx17.us = getelementptr inbounds float, float* %pG, i32 %add16.us
-  %2 = load float, float* %arrayidx17.us, align 4
+  %arrayidx17.us = getelementptr inbounds float, ptr %pG, i32 %add16.us
+  %2 = load float, ptr %arrayidx17.us, align 4
   %add19.us = add nsw i32 %k.063.us, %mul.us
-  %arrayidx20.us = getelementptr inbounds float, float* %pG, i32 %add19.us
-  %3 = load float, float* %arrayidx20.us, align 4
+  %arrayidx20.us = getelementptr inbounds float, ptr %pG, i32 %add19.us
+  %3 = load float, ptr %arrayidx20.us, align 4
   %mul21.us = fmul fast float %3, %2
   %sub.us = fsub fast float %1, %mul21.us
-  store float %sub.us, float* %arrayidx7.us, align 4
+  store float %sub.us, ptr %arrayidx7.us, align 4
   %inc.us = add nuw nsw i32 %k.063.us, 1
   %exitcond.not = icmp eq i32 %inc.us, %i.067
   br i1 %exitcond.not, label %for.cond8.for.cond.cleanup10_crit_edge.us, label %for.body11.us
@@ -127,10 +127,10 @@ for.cond.cleanup3:                                ; preds = %for.cond8.for.cond.
 for.body4:                                        ; preds = %for.body4.lr.ph, %for.body4
   %j.065 = phi i32 [ %inc26, %for.body4 ], [ 0, %for.body4.lr.ph ]
   %mul = mul nsw i32 %j.065, %n
-  %arrayidx = getelementptr inbounds float, float* %pA, i32 %mul
-  %4 = load float, float* %arrayidx, align 4
-  %arrayidx7 = getelementptr inbounds float, float* %pG, i32 %mul
-  store float %4, float* %arrayidx7, align 4
+  %arrayidx = getelementptr inbounds float, ptr %pA, i32 %mul
+  %4 = load float, ptr %arrayidx, align 4
+  %arrayidx7 = getelementptr inbounds float, ptr %pG, i32 %mul
+  store float %4, ptr %arrayidx7, align 4
   %inc26 = add nuw nsw i32 %j.065, 1
   %exitcond72.not = icmp eq i32 %inc26, %n
   br i1 %exitcond72.not, label %for.cond.cleanup3, label %for.body4

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll
index f8247aa7486b9..eee95015c121e 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-none-none-eabi"
 
-define i32 @reduction_sum_single(i32* noalias nocapture %A) {
+define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_sum_single(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -14,9 +14,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_MASKED_LOAD]])
 ; CHECK-NEXT:    [[TMP3]] = add i32 [[TMP2]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
@@ -38,8 +37,8 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
   %l7 = add i32 %sum.02, %l3
   %indvars.iv.next = add i32 %indvars.iv, 1
   %exitcond = icmp eq i32 %indvars.iv.next, 257
@@ -50,7 +49,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_sum(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -61,12 +60,10 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[VEC_IND]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
 ; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[TMP5]], [[VEC_PHI]]
@@ -94,10 +91,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l7 = add i32 %sum.02, %indvars.iv
   %l8 = add i32 %l7, %l3
   %l9 = add i32 %l8, %l5
@@ -110,7 +107,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_prod(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -120,12 +117,10 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1, i32 1, i32 1, i32 1>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul <4 x i32> [[VEC_PHI]], [[WIDE_MASKED_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], [[WIDE_MASKED_LOAD1]]
 ; CHECK-NEXT:    [[TMP6]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP5]], <4 x i32> [[VEC_PHI]]
@@ -149,10 +144,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l8 = mul i32 %prod.02, %l3
   %l9 = mul i32 %l8, %l5
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -164,7 +159,7 @@ entry:
   ret i32 %prod.0.lcssa
 }
 
-define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_and(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -174,12 +169,10 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = and <4 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP4]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
 ; CHECK-NEXT:    [[TMP6]] = and <4 x i32> [[VEC_PHI]], [[TMP5]]
@@ -203,10 +196,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = and i32 %result.08, %l0
   %and = and i32 %add, %l1
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -218,7 +211,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_or(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -228,12 +221,10 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP4]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6]] = or <4 x i32> [[VEC_PHI]], [[TMP5]]
@@ -257,10 +248,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %l1, %l0
   %or = or i32 %add, %result.08
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -272,7 +263,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_xor(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -282,12 +273,10 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP4]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6]] = xor <4 x i32> [[VEC_PHI]], [[TMP5]]
@@ -311,10 +300,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %l1, %l0
   %xor = xor i32 %add, %result.08
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -326,7 +315,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
+define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_fadd(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -336,12 +325,10 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_MASKED_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd fast <4 x float> [[TMP4]], [[WIDE_MASKED_LOAD1]]
 ; CHECK-NEXT:    [[TMP6]] = select fast <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP5]], <4 x float> [[VEC_PHI]]
@@ -365,10 +352,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i32 %indvars.iv
-  %l1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i32 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i32 %indvars.iv
+  %l1 = load float, ptr %arrayidx2, align 4
   %add = fadd fast float %result.08, %l0
   %fadd = fadd fast float %add, %l1
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -380,7 +367,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret float %result.0.lcssa
 }
 
-define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
+define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_fmul(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -390,12 +377,10 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[WIDE_MASKED_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], [[WIDE_MASKED_LOAD1]]
 ; CHECK-NEXT:    [[TMP6]] = select fast <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP5]], <4 x float> [[VEC_PHI]]
@@ -419,10 +404,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i32 %indvars.iv
-  %l1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i32 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i32 %indvars.iv
+  %l1 = load float, ptr %arrayidx2, align 4
   %add = fmul fast float %result.08, %l0
   %fmul = fmul fast float %add, %l1
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -434,7 +419,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret float %result.0.lcssa
 }
 
-define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_min(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -443,9 +428,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
@@ -460,8 +444,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[RESULT_08:%.*]] = phi i32 [ [[TMP5:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]]
-; CHECK-NEXT:    [[L0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDVARS_IV]]
+; CHECK-NEXT:    [[L0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TMP5]] = call i32 @llvm.smin.i32(i32 [[RESULT_08]], i32 [[L0]])
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257
@@ -476,8 +460,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
   %c0 = icmp slt i32 %result.08, %l0
   %v0 = select i1 %c0, i32 %result.08, i32 %l0
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -489,7 +473,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_max(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -498,9 +482,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
@@ -515,8 +498,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[RESULT_08:%.*]] = phi i32 [ [[TMP5:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]]
-; CHECK-NEXT:    [[L0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDVARS_IV]]
+; CHECK-NEXT:    [[L0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TMP5]] = call i32 @llvm.umax.i32(i32 [[RESULT_08]], i32 [[L0]])
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257
@@ -531,8 +514,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
   %c0 = icmp ugt i32 %result.08, %l0
   %v0 = select i1 %c0, i32 %result.08, i32 %l0
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -544,15 +527,15 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define float @reduction_fmax(float* nocapture %A, float* nocapture %B) {
+define float @reduction_fmax(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_fmax(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[RESULT_08:%.*]] = phi float [ [[V0:%.*]], [[FOR_BODY]] ], [ 1.000000e+03, [[ENTRY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[INDVARS_IV]]
-; CHECK-NEXT:    [[L0:%.*]] = load float, float* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 [[INDVARS_IV]]
+; CHECK-NEXT:    [[L0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[C0:%.*]] = fcmp ogt float [[RESULT_08]], [[L0]]
 ; CHECK-NEXT:    [[V0]] = select i1 [[C0]], float [[RESULT_08]], float [[L0]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1
@@ -567,8 +550,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi float [ %v0, %for.body ], [ 1000.0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i32 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
   %c0 = fcmp ogt float %result.08, %l0
   %v0 = select i1 %c0, float %result.08, float %l0
   %indvars.iv.next = add i32 %indvars.iv, 1

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
index bb6bd351dcd53..33599abca3ec4 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
@@ -5,7 +5,7 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-arm-none-eabi"
 
 ; Should not be vectorized
-define i64 @add_i64_i64(i64* nocapture readonly %x, i32 %n) #0 {
+define i64 @add_i64_i64(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-LABEL: @add_i64_i64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -13,8 +13,8 @@ define i64 @add_i64_i64(i64* nocapture readonly %x, i32 %n) #0 {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[X:%.*]], i32 [[I_08]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[X:%.*]], i32 [[I_08]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[ADD]] = add nsw i64 [[TMP0]], [[R_07]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
@@ -30,8 +30,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.07 = phi i64 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %x, i32 %i.08
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %x, i32 %i.08
+  %0 = load i64, ptr %arrayidx, align 8
   %add = add nsw i64 %0, %r.07
   %inc = add nuw nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %inc, %n
@@ -44,7 +44,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 ; 4x to use VADDLV
 ; FIXME: TailPredicate
-define i64 @add_i32_i64(i32* nocapture readonly %x, i32 %n) #0 {
+define i64 @add_i32_i64(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-LABEL: @add_i32_i64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -58,9 +58,8 @@ define i64 @add_i32_i64(i32* nocapture readonly %x, i32 %n) #0 {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP2]])
 ; CHECK-NEXT:    [[TMP4]] = add i64 [[TMP3]], [[VEC_PHI]]
@@ -77,8 +76,8 @@ define i64 @add_i32_i64(i32* nocapture readonly %x, i32 %n) #0 {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[I_08]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_08]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP6]] to i64
 ; CHECK-NEXT:    [[ADD]] = add nsw i64 [[R_07]], [[CONV]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
@@ -95,8 +94,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.07 = phi i64 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08
+  %0 = load i32, ptr %arrayidx, align 4
   %conv = sext i32 %0 to i64
   %add = add nsw i64 %r.07, %conv
   %inc = add nuw nsw i32 %i.08, 1
@@ -110,7 +109,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 ; 4x to use VADDLV
 ; FIXME: TailPredicate
-define i64 @add_i16_i64(i16* nocapture readonly %x, i32 %n) #0 {
+define i64 @add_i16_i64(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-LABEL: @add_i16_i64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -124,9 +123,8 @@ define i64 @add_i16_i64(i16* nocapture readonly %x, i32 %n) #0 {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <4 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i64>
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP2]])
 ; CHECK-NEXT:    [[TMP4]] = add i64 [[TMP3]], [[VEC_PHI]]
@@ -143,8 +141,8 @@ define i64 @add_i16_i64(i16* nocapture readonly %x, i32 %n) #0 {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[X]], i32 [[I_08]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_08]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP6]] to i64
 ; CHECK-NEXT:    [[ADD]] = add nsw i64 [[R_07]], [[CONV]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
@@ -161,8 +159,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.07 = phi i64 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.08
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %x, i32 %i.08
+  %0 = load i16, ptr %arrayidx, align 2
   %conv = sext i16 %0 to i64
   %add = add nsw i64 %r.07, %conv
   %inc = add nuw nsw i32 %i.08, 1
@@ -176,7 +174,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 ; 4x to use VADDLV
 ; FIXME: TailPredicate
-define i64 @add_i8_i64(i8* nocapture readonly %x, i32 %n) #0 {
+define i64 @add_i8_i64(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-LABEL: @add_i8_i64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -190,9 +188,8 @@ define i64 @add_i8_i64(i8* nocapture readonly %x, i32 %n) #0 {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i64>
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP2]])
 ; CHECK-NEXT:    [[TMP4]] = add i64 [[TMP3]], [[VEC_PHI]]
@@ -209,8 +206,8 @@ define i64 @add_i8_i64(i8* nocapture readonly %x, i32 %n) #0 {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X]], i32 [[I_08]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i32 [[I_08]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP6]] to i64
 ; CHECK-NEXT:    [[ADD]] = add nuw nsw i64 [[R_07]], [[CONV]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
@@ -227,8 +224,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.07 = phi i64 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %x, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i64
   %add = add nuw nsw i64 %r.07, %conv
   %inc = add nuw nsw i32 %i.08, 1
@@ -241,7 +238,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 4x to use VADDV.u32
-define i32 @add_i32_i32(i32* nocapture readonly %x, i32 %n) #0 {
+define i32 @add_i32_i32(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-LABEL: @add_i32_i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -254,9 +251,8 @@ define i32 @add_i32_i32(i32* nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_MASKED_LOAD]])
 ; CHECK-NEXT:    [[TMP3]] = add i32 [[TMP2]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
@@ -273,8 +269,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.07 = phi i32 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, %r.07
   %inc = add nuw nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %inc, %n
@@ -286,7 +282,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 8x to use VADDV.u16
-define i32 @add_i16_i32(i16* nocapture readonly %x, i32 %n) #0 {
+define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-LABEL: @add_i16_i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -299,9 +295,8 @@ define i32 @add_i16_i32(i16* nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP1]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP0]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i16> [[WIDE_MASKED_LOAD]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i32> [[TMP2]], <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP3]])
@@ -320,8 +315,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.07 = phi i32 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.08
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %x, i32 %i.08
+  %0 = load i16, ptr %arrayidx, align 2
   %conv = sext i16 %0 to i32
   %add = add nsw i32 %r.07, %conv
   %inc = add nuw nsw i32 %i.08, 1
@@ -334,7 +329,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 16x to use VADDV.u16
-define i32 @add_i8_i32(i8* nocapture readonly %x, i32 %n) #0 {
+define i32 @add_i8_i32(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-LABEL: @add_i8_i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -347,9 +342,8 @@ define i32 @add_i8_i32(i8* nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP0]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <16 x i8> [[WIDE_MASKED_LOAD]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i32> [[TMP2]], <16 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP3]])
@@ -368,8 +362,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.07 = phi i32 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %x, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nuw nsw i32 %r.07, %conv
   %inc = add nuw nsw i32 %i.08, 1
@@ -382,7 +376,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 8x to use VADDV.u16
-define signext i16 @add_i16_i16(i16* nocapture readonly %x, i32 %n) #0 {
+define signext i16 @add_i16_i16(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-LABEL: @add_i16_i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -395,9 +389,8 @@ define signext i16 @add_i16_i16(i16* nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP1]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> zeroinitializer)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP0]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> zeroinitializer)
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[WIDE_MASKED_LOAD]])
 ; CHECK-NEXT:    [[TMP3]] = add i16 [[TMP2]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
@@ -414,8 +407,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.09 = phi i16 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.010
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %x, i32 %i.010
+  %0 = load i16, ptr %arrayidx, align 2
   %add = add i16 %0, %r.09
   %inc = add nuw nsw i32 %i.010, 1
   %exitcond = icmp eq i32 %inc, %n
@@ -427,7 +420,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 16x to use VADDV.u8
-define signext i16 @add_i8_i16(i8* nocapture readonly %x, i32 %n) #0 {
+define signext i16 @add_i8_i16(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-LABEL: @add_i8_i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -440,9 +433,8 @@ define signext i16 @add_i8_i16(i8* nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP0]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <16 x i8> [[WIDE_MASKED_LOAD]] to <16 x i16>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i16> [[TMP2]], <16 x i16> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[TMP3]])
@@ -461,8 +453,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.09 = phi i16 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.010
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %x, i32 %i.010
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i16
   %add = add i16 %r.09, %conv
   %inc = add nuw nsw i32 %i.010, 1
@@ -475,7 +467,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 16x to use VADDV.u8
-define zeroext i8 @add_i8_i8(i8* nocapture readonly %x, i32 %n) #0 {
+define zeroext i8 @add_i8_i8(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-LABEL: @add_i8_i8(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -488,9 +480,8 @@ define zeroext i8 @add_i8_i8(i8* nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i8 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> zeroinitializer)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP0]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> zeroinitializer)
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> [[WIDE_MASKED_LOAD]])
 ; CHECK-NEXT:    [[TMP3]] = add i8 [[TMP2]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 16
@@ -507,8 +498,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.08 = phi i8 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.09
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %x, i32 %i.09
+  %0 = load i8, ptr %arrayidx, align 1
   %add = add i8 %0, %r.08
   %inc = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %inc, %n
@@ -520,7 +511,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; Not vectorized
-define i64 @mla_i64_i64(i64* nocapture readonly %x, i64* nocapture readonly %y, i32 %n) #0 {
+define i64 @mla_i64_i64(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i64_i64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -528,10 +519,10 @@ define i64 @mla_i64_i64(i64* nocapture readonly %x, i64* nocapture readonly %y,
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[R_09:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[X:%.*]], i32 [[I_010]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[Y:%.*]], i32 [[I_010]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[X:%.*]], i32 [[I_010]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[Y:%.*]], i32 [[I_010]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[TMP1]], [[TMP0]]
 ; CHECK-NEXT:    [[ADD]] = add nsw i64 [[MUL]], [[R_09]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_010]], 1
@@ -548,10 +539,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.09 = phi i64 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %x, i32 %i.010
-  %0 = load i64, i64* %arrayidx, align 8
-  %arrayidx1 = getelementptr inbounds i64, i64* %y, i32 %i.010
-  %1 = load i64, i64* %arrayidx1, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %x, i32 %i.010
+  %0 = load i64, ptr %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds i64, ptr %y, i32 %i.010
+  %1 = load i64, ptr %arrayidx1, align 8
   %mul = mul nsw i64 %1, %0
   %add = add nsw i64 %mul, %r.09
   %inc = add nuw nsw i32 %i.010, 1
@@ -565,7 +556,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 ; 4x to use VMLAL.u32
 ; FIXME: TailPredicate
-define i64 @mla_i32_i64(i32* nocapture readonly %x, i32* nocapture readonly %y, i32 %n) #0 {
+define i64 @mla_i32_i64(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i32_i64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -579,12 +570,10 @@ define i64 @mla_i32_i64(i32* nocapture readonly %x, i32* nocapture readonly %y,
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[Y:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = sext <4 x i32> [[TMP4]] to <4 x i64>
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP5]])
@@ -602,10 +591,10 @@ define i64 @mla_i32_i64(i32* nocapture readonly %x, i32* nocapture readonly %y,
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_09:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[I_010]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[Y]], i32 [[I_010]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_010]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[Y]], i32 [[I_010]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP10]], [[TMP9]]
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[MUL]] to i64
 ; CHECK-NEXT:    [[ADD]] = add nsw i64 [[R_09]], [[CONV]]
@@ -623,10 +612,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.09 = phi i64 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.010
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %y, i32 %i.010
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.010
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %y, i32 %i.010
+  %1 = load i32, ptr %arrayidx1, align 4
   %mul = mul nsw i32 %1, %0
   %conv = sext i32 %mul to i64
   %add = add nsw i64 %r.09, %conv
@@ -641,7 +630,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 ; 8x to use VMLAL.u16
 ; FIXME: TailPredicate
-define i64 @mla_i16_i64(i16* nocapture readonly %x, i16* nocapture readonly %y, i32 %n) #0 {
+define i64 @mla_i16_i64(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i16_i64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP10:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -655,13 +644,11 @@ define i64 @mla_i16_i64(i16* nocapture readonly %x, i16* nocapture readonly %y,
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP0]], align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i16> [[WIDE_LOAD]] to <8 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2
 ; CHECK-NEXT:    [[TMP5:%.*]] = sext <8 x i16> [[WIDE_LOAD1]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nsw <8 x i32> [[TMP5]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sext <8 x i32> [[TMP6]] to <8 x i64>
@@ -680,11 +667,11 @@ define i64 @mla_i16_i64(i16* nocapture readonly %x, i16* nocapture readonly %y,
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_012:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_011:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[X]], i32 [[I_012]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_012]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP11]] to i32
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[Y]], i32 [[I_012]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[Y]], i32 [[I_012]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
 ; CHECK-NEXT:    [[CONV2:%.*]] = sext i16 [[TMP12]] to i32
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]]
 ; CHECK-NEXT:    [[CONV3:%.*]] = sext i32 [[MUL]] to i64
@@ -703,11 +690,11 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.011 = phi i64 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.012
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %x, i32 %i.012
+  %0 = load i16, ptr %arrayidx, align 2
   %conv = sext i16 %0 to i32
-  %arrayidx1 = getelementptr inbounds i16, i16* %y, i32 %i.012
-  %1 = load i16, i16* %arrayidx1, align 2
+  %arrayidx1 = getelementptr inbounds i16, ptr %y, i32 %i.012
+  %1 = load i16, ptr %arrayidx1, align 2
   %conv2 = sext i16 %1 to i32
   %mul = mul nsw i32 %conv2, %conv
   %conv3 = sext i32 %mul to i64
@@ -723,7 +710,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 ; 8x to use VMLAL.u16
 ; FIXME: TailPredicate
-define i64 @mla_i8_i64(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 {
+define i64 @mla_i8_i64(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i8_i64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP10:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -737,13 +724,11 @@ define i64 @mla_i8_i64(i8* nocapture readonly %x, i8* nocapture readonly %y, i32
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <8 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP4]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <8 x i8> [[WIDE_LOAD1]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nuw nsw <8 x i32> [[TMP5]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext <8 x i32> [[TMP6]] to <8 x i64>
@@ -762,11 +747,11 @@ define i64 @mla_i8_i64(i8* nocapture readonly %x, i8* nocapture readonly %y, i32
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_012:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_011:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X]], i32 [[I_012]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i32 [[I_012]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP11]] to i32
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[Y]], i32 [[I_012]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[Y]], i32 [[I_012]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
 ; CHECK-NEXT:    [[CONV2:%.*]] = zext i8 [[TMP12]] to i32
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[CONV2]], [[CONV]]
 ; CHECK-NEXT:    [[CONV3:%.*]] = zext i32 [[MUL]] to i64
@@ -785,11 +770,11 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.011 = phi i64 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.012
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %x, i32 %i.012
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
-  %arrayidx1 = getelementptr inbounds i8, i8* %y, i32 %i.012
-  %1 = load i8, i8* %arrayidx1, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %y, i32 %i.012
+  %1 = load i8, ptr %arrayidx1, align 1
   %conv2 = zext i8 %1 to i32
   %mul = mul nuw nsw i32 %conv2, %conv
   %conv3 = zext i32 %mul to i64
@@ -804,7 +789,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 4x to use VMLA.i32
-define i32 @mla_i32_i32(i32* nocapture readonly %x, i32* nocapture readonly %y, i32 %n) #0 {
+define i32 @mla_i32_i32(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i32_i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -817,12 +802,10 @@ define i32 @mla_i32_i32(i32* nocapture readonly %x, i32* nocapture readonly %y,
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[Y:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP4]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
@@ -841,10 +824,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.09 = phi i32 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.010
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %y, i32 %i.010
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.010
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %y, i32 %i.010
+  %1 = load i32, ptr %arrayidx1, align 4
   %mul = mul nsw i32 %1, %0
   %add = add nsw i32 %mul, %r.09
   %inc = add nuw nsw i32 %i.010, 1
@@ -857,7 +840,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 8x to use VMLA.i16
-define i32 @mla_i16_i32(i16* nocapture readonly %x, i16* nocapture readonly %y, i32 %n) #0 {
+define i32 @mla_i16_i32(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i16_i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -870,13 +853,11 @@ define i32 @mla_i16_i32(i16* nocapture readonly %x, i16* nocapture readonly %y,
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP1]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP0]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i16> [[WIDE_MASKED_LOAD]] to <8 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP4]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP3]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
 ; CHECK-NEXT:    [[TMP5:%.*]] = sext <8 x i16> [[WIDE_MASKED_LOAD1]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nsw <8 x i32> [[TMP5]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i32> [[TMP6]], <8 x i32> zeroinitializer
@@ -896,11 +877,11 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.010 = phi i32 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.011
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %x, i32 %i.011
+  %0 = load i16, ptr %arrayidx, align 2
   %conv = sext i16 %0 to i32
-  %arrayidx1 = getelementptr inbounds i16, i16* %y, i32 %i.011
-  %1 = load i16, i16* %arrayidx1, align 2
+  %arrayidx1 = getelementptr inbounds i16, ptr %y, i32 %i.011
+  %1 = load i16, ptr %arrayidx1, align 2
   %conv2 = sext i16 %1 to i32
   %mul = mul nsw i32 %conv2, %conv
   %add = add nsw i32 %mul, %r.010
@@ -914,7 +895,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 16x to use VMLA.i8
-define i32 @mla_i8_i32(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 {
+define i32 @mla_i8_i32(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i8_i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -927,13 +908,11 @@ define i32 @mla_i8_i32(i8* nocapture readonly %x, i8* nocapture readonly %y, i32
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP0]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <16 x i8> [[WIDE_MASKED_LOAD]] to <16 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP4]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP3]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <16 x i8> [[WIDE_MASKED_LOAD1]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nuw nsw <16 x i32> [[TMP5]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i32> [[TMP6]], <16 x i32> zeroinitializer
@@ -953,11 +932,11 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.010 = phi i32 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.011
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %x, i32 %i.011
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
-  %arrayidx1 = getelementptr inbounds i8, i8* %y, i32 %i.011
-  %1 = load i8, i8* %arrayidx1, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %y, i32 %i.011
+  %1 = load i8, ptr %arrayidx1, align 1
   %conv2 = zext i8 %1 to i32
   %mul = mul nuw nsw i32 %conv2, %conv
   %add = add nuw nsw i32 %mul, %r.010
@@ -971,7 +950,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 8x to use VMLA.i16
-define signext i16 @mla_i16_i16(i16* nocapture readonly %x, i16* nocapture readonly %y, i32 %n) #0 {
+define signext i16 @mla_i16_i16(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i16_i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -984,12 +963,10 @@ define signext i16 @mla_i16_i16(i16* nocapture readonly %x, i16* nocapture reado
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP1]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP3]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP0]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP2]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul <8 x i16> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> [[TMP4]], <8 x i16> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[TMP5]])
@@ -1008,10 +985,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.012 = phi i16 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.013
-  %0 = load i16, i16* %arrayidx, align 2
-  %arrayidx1 = getelementptr inbounds i16, i16* %y, i32 %i.013
-  %1 = load i16, i16* %arrayidx1, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %x, i32 %i.013
+  %0 = load i16, ptr %arrayidx, align 2
+  %arrayidx1 = getelementptr inbounds i16, ptr %y, i32 %i.013
+  %1 = load i16, ptr %arrayidx1, align 2
   %mul = mul i16 %1, %0
   %add = add i16 %mul, %r.012
   %inc = add nuw nsw i32 %i.013, 1
@@ -1024,7 +1001,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 16x to use VMLA.i8
-define signext i16 @mla_i8_i16(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 {
+define signext i16 @mla_i8_i16(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i8_i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -1037,13 +1014,11 @@ define signext i16 @mla_i8_i16(i8* nocapture readonly %x, i8* nocapture readonly
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP0]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <16 x i8> [[WIDE_MASKED_LOAD]] to <16 x i16>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP4]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP3]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <16 x i8> [[WIDE_MASKED_LOAD1]] to <16 x i16>
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nuw <16 x i16> [[TMP5]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i16> [[TMP6]], <16 x i16> zeroinitializer
@@ -1063,11 +1038,11 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.012 = phi i16 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.013
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %x, i32 %i.013
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i16
-  %arrayidx1 = getelementptr inbounds i8, i8* %y, i32 %i.013
-  %1 = load i8, i8* %arrayidx1, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %y, i32 %i.013
+  %1 = load i8, ptr %arrayidx1, align 1
   %conv2 = zext i8 %1 to i16
   %mul = mul nuw i16 %conv2, %conv
   %add = add i16 %mul, %r.012
@@ -1081,7 +1056,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 16x to use VMLA.i8
-define zeroext i8 @mla_i8_i8(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 {
+define zeroext i8 @mla_i8_i8(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i8_i8(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP10:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -1094,12 +1069,10 @@ define zeroext i8 @mla_i8_i8(i8* nocapture readonly %x, i8* nocapture readonly %
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i8 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP0]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP2]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul <16 x i8> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> [[TMP4]], <16 x i8> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> [[TMP5]])
@@ -1118,10 +1091,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.011 = phi i8 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.012
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx1 = getelementptr inbounds i8, i8* %y, i32 %i.012
-  %1 = load i8, i8* %arrayidx1, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %x, i32 %i.012
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %y, i32 %i.012
+  %1 = load i8, ptr %arrayidx1, align 1
   %mul = mul i8 %1, %0
   %add = add i8 %mul, %r.011
   %inc = add nuw nsw i32 %i.012, 1
@@ -1134,7 +1107,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 8x as 
diff erent types
-define i32 @red_mla_ext_s8_s16_s32(i8* noalias nocapture readonly %A, i16* noalias nocapture readonly %B, i32 %n) #0 {
+define i32 @red_mla_ext_s8_s16_s32(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, i32 %n) #0 {
 ; CHECK-LABEL: @red_mla_ext_s8_s16_s32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP9_NOT:%.*]] = icmp eq i32 [[N:%.*]], 0
@@ -1147,13 +1120,11 @@ define i32 @red_mla_ext_s8_s16_s32(i8* noalias nocapture readonly %A, i16* noali
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* [[TMP1]], i32 1, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i8> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr [[TMP0]], i32 1, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i8> poison)
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i8> [[WIDE_MASKED_LOAD]] to <8 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP4]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP3]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
 ; CHECK-NEXT:    [[TMP5:%.*]] = sext <8 x i16> [[WIDE_MASKED_LOAD1]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nsw <8 x i32> [[TMP5]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i32> [[TMP6]], <8 x i32> zeroinitializer
@@ -1176,11 +1147,11 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.011 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %s.010 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i32 %i.011
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %A, i32 %i.011
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = sext i8 %0 to i32
-  %arrayidx1 = getelementptr inbounds i16, i16* %B, i32 %i.011
-  %1 = load i16, i16* %arrayidx1, align 2
+  %arrayidx1 = getelementptr inbounds i16, ptr %B, i32 %i.011
+  %1 = load i16, ptr %arrayidx1, align 2
   %conv2 = sext i16 %1 to i32
   %mul = mul nsw i32 %conv2, %conv
   %add = add nsw i32 %mul, %s.010
@@ -1198,7 +1169,7 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 }
 
 ; 4x as 
diff erent sext vs zext
-define i64 @red_mla_ext_s16_u16_s64(i16* noalias nocapture readonly %A, i16* noalias nocapture readonly %B, i32 %n) #0 {
+define i64 @red_mla_ext_s16_u16_s64(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, i32 %n) #0 {
 ; CHECK-LABEL: @red_mla_ext_s16_u16_s64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP9_NOT:%.*]] = icmp eq i32 [[N:%.*]], 0
@@ -1212,13 +1183,11 @@ define i64 @red_mla_ext_s16_u16_s64(i16* noalias nocapture readonly %A, i16* noa
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <4 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <4 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i16>, ptr [[TMP3]], align 2
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i16> [[WIDE_LOAD1]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nsw <4 x i32> [[TMP5]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext <4 x i32> [[TMP6]] to <4 x i64>
@@ -1237,11 +1206,11 @@ define i64 @red_mla_ext_s16_u16_s64(i16* noalias nocapture readonly %A, i16* noa
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[S_010:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[A]], i32 [[I_011]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[I_011]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i16, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP11]] to i32
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[B]], i32 [[I_011]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[I_011]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
 ; CHECK-NEXT:    [[CONV2:%.*]] = zext i16 [[TMP12]] to i32
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]]
 ; CHECK-NEXT:    [[MUL2:%.*]] = zext i32 [[MUL]] to i64
@@ -1263,11 +1232,11 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.011 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %s.010 = phi i64 [ %add, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i16, i16* %A, i32 %i.011
-  %0 = load i16, i16* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i16, ptr %A, i32 %i.011
+  %0 = load i16, ptr %arrayidx, align 1
   %conv = sext i16 %0 to i32
-  %arrayidx1 = getelementptr inbounds i16, i16* %B, i32 %i.011
-  %1 = load i16, i16* %arrayidx1, align 2
+  %arrayidx1 = getelementptr inbounds i16, ptr %B, i32 %i.011
+  %1 = load i16, ptr %arrayidx1, align 2
   %conv2 = zext i16 %1 to i32
   %mul = mul nsw i32 %conv2, %conv
   %mul2 = zext i32 %mul to i64
@@ -1286,7 +1255,7 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 }
 
 ; 4x as 
diff erent sext vs zext
-define i32 @red_mla_u8_s8_u32(i8* noalias nocapture readonly %A, i8* noalias nocapture readonly %B, i32 %n) #0 {
+define i32 @red_mla_u8_s8_u32(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, i32 %n) #0 {
 ; CHECK-LABEL: @red_mla_u8_s8_u32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP9_NOT:%.*]] = icmp eq i32 [[N:%.*]], 0
@@ -1299,13 +1268,11 @@ define i32 @red_mla_u8_s8_u32(i8* noalias nocapture readonly %A, i8* noalias noc
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* [[TMP1]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr [[TMP0]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison)
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i8> [[WIDE_MASKED_LOAD]] to <4 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <4 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* [[TMP4]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison)
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr [[TMP3]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison)
 ; CHECK-NEXT:    [[TMP5:%.*]] = sext <4 x i8> [[WIDE_MASKED_LOAD1]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nsw <4 x i32> [[TMP5]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP6]], <4 x i32> zeroinitializer
@@ -1328,11 +1295,11 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.011 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %s.010 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i32 %i.011
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %A, i32 %i.011
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
-  %arrayidx1 = getelementptr inbounds i8, i8* %B, i32 %i.011
-  %1 = load i8, i8* %arrayidx1, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %B, i32 %i.011
+  %1 = load i8, ptr %arrayidx1, align 1
   %conv2 = sext i8 %1 to i32
   %mul = mul nsw i32 %conv2, %conv
   %add = add i32 %mul, %s.010
@@ -1350,7 +1317,7 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 }
 
 ; Make sure interleave group members feeding in-loop reductions can be handled.
-define i32 @reduction_interleave_group(i32 %n, i32* %arr) #0 {
+define i32 @reduction_interleave_group(i32 %n, ptr %arr) #0 {
 ; CHECK-LABEL: @reduction_interleave_group(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[GUARD:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -1370,10 +1337,9 @@ define i32 @reduction_interleave_group(i32 %n, i32* %arr) #0 {
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[OFFSET_IDX]], 1
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 -1
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, i32* [[TMP4]], i32 [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>*
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 -1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 [[TMP3]]
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; CHECK-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[STRIDED_VEC1]])
@@ -1394,10 +1360,10 @@ define i32 @reduction_interleave_group(i32 %n, i32* %arr) #0 {
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[RED_PHI:%.*]] = phi i32 [ [[RED_2:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[ADD:%.*]] = or i32 [[IV]], 1
-; CHECK-NEXT:    [[GEP_0:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i32 [[ADD]]
-; CHECK-NEXT:    [[L_0:%.*]] = load i32, i32* [[GEP_0]], align 4
-; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i32 [[IV]]
-; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
+; CHECK-NEXT:    [[GEP_0:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i32 [[ADD]]
+; CHECK-NEXT:    [[L_0:%.*]] = load i32, ptr [[GEP_0]], align 4
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i32 [[IV]]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, ptr [[GEP_1]], align 4
 ; CHECK-NEXT:    [[RED_1:%.*]] = add i32 [[L_0]], [[RED_PHI]]
 ; CHECK-NEXT:    [[RED_2]] = add i32 [[RED_1]], [[L_1]]
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 2
@@ -1415,10 +1381,10 @@ for.body:                                         ; preds = %for.body.preheader,
   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
   %red.phi = phi i32 [ %red.2, %for.body ], [ 0, %entry ]
   %add = or i32 %iv, 1
-  %gep.0 = getelementptr inbounds i32, i32* %arr, i32 %add
-  %l.0 = load i32, i32* %gep.0, align 4
-  %gep.1 = getelementptr inbounds i32, i32* %arr, i32 %iv
-  %l.1 = load i32, i32* %gep.1, align 4
+  %gep.0 = getelementptr inbounds i32, ptr %arr, i32 %add
+  %l.0 = load i32, ptr %gep.0, align 4
+  %gep.1 = getelementptr inbounds i32, ptr %arr, i32 %iv
+  %l.1 = load i32, ptr %gep.1, align 4
   %red.1 = add i32 %l.0, %red.phi
   %red.2 = add i32 %red.1, %l.1
   %iv.next = add nuw nsw i32 %iv, 2
@@ -1431,7 +1397,7 @@ exit:
 }
 
 ; 16x to use VMLA.i8, same as mla_i8_i32 with multiple uses of the ext `add(mul(x, x))`
-define i32 @mla_i8_i32_multiuse(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 {
+define i32 @mla_i8_i32_multiuse(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i8_i32_multiuse(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -1444,9 +1410,8 @@ define i32 @mla_i8_i32_multiuse(i8* nocapture readonly %x, i8* nocapture readonl
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP0]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <16 x i8> [[WIDE_MASKED_LOAD]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul nuw nsw <16 x i32> [[TMP2]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i32> [[TMP3]], <16 x i32> zeroinitializer
@@ -1466,8 +1431,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.010 = phi i32 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.011
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %x, i32 %i.011
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %mul = mul nuw nsw i32 %conv, %conv
   %add = add nuw nsw i32 %mul, %r.010

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll
index 2b806fb5e2089..a0ff7629b42c3 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll
@@ -12,14 +12,14 @@ target triple = "thumbv8.1m.main-none-none-eabi"
 ; CHECK-COST: LV: Found an estimated cost of 2 for VF 4 For instruction:   %l45 = and i32 %and515, 131072
 ; CHECK-NOT: vector.body
 
-define void @test([101 x i32] *%src, i32 %N) #0 {
+define void @test(ptr %src, i32 %N) #0 {
 entry:
   br label %for.body386
 
 for.body386:                                      ; preds = %entry, %l77
   %add387 = phi i32 [ %inc532, %l77 ], [ 0, %entry ]
-  %arrayidx388 = getelementptr inbounds [101 x i32], [101 x i32]* %src, i32 0, i32 %add387
-  %l41 = load i32, i32* %arrayidx388, align 4
+  %arrayidx388 = getelementptr inbounds [101 x i32], ptr %src, i32 0, i32 %add387
+  %l41 = load i32, ptr %arrayidx388, align 4
   %l42 = and i32 %l41, 65535
   %l43 = icmp eq i32 %l42, 0
   br i1 %l43, label %l77, label %l44
@@ -72,7 +72,7 @@ l44:                                               ; preds = %for.body386
   %and524 = shl i32 %l41, 1
   %l75 = and i32 %and524, 65536
   %l76 = or i32 %l75, %l74
-  store i32 %l76, i32* %arrayidx388, align 4
+  store i32 %l76, ptr %arrayidx388, align 4
   br label %l77
 
 l77:                                               ; preds = %for.body386, %l44

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
index 6f5b7f50cc91f..ed6254074c8b5 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
@@ -52,19 +52,19 @@
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
-define void @prefer_folding(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 {
+define void @prefer_folding(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:    prefer_folding(
 ; PREFER-FOLDING: vector.body:
 ; PREFER-FOLDING: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; PREFER-FOLDING: %[[VIVELEM0:.*]] = add i32 %index, 0
 ; PREFER-FOLDING: %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %[[VIVELEM0]], i32 431)
-; PREFER-FOLDING: call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %active.lane.mask,
-; PREFER-FOLDING: call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %active.lane.mask,
-; PREFER-FOLDING: call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %active.lane.mask
+; PREFER-FOLDING: call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %active.lane.mask,
+; PREFER-FOLDING: call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %active.lane.mask,
+; PREFER-FOLDING: call void @llvm.masked.store.v4i32.p0({{.*}}, <4 x i1> %active.lane.mask
 ; PREFER-FOLDING: br i1 %{{.*}}, label %{{.*}}, label %vector.body
 ;
-; NO-FOLDING-NOT: call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(
-; NO-FOLDING-NOT: call void @llvm.masked.store.v4i32.p0v4i32(
+; NO-FOLDING-NOT: call <4 x i32> @llvm.masked.load.v4i32.p0(
+; NO-FOLDING-NOT: call void @llvm.masked.store.v4i32.p0(
 ; NO-FOLDING:     br i1 %{{.*}}, label %{{.*}}, label %for.body
 entry:
   br label %for.body
@@ -74,27 +74,27 @@ for.cond.cleanup:
 
 for.body:
   %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.09
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.09
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %C, i32 %i.09
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i32 %i.09
+  store i32 %add, ptr %arrayidx2, align 4
   %add3 = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %add3, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define void @mixed_types(i16* noalias nocapture %A, i16* noalias nocapture readonly %B, i16* noalias nocapture readonly %C, i32* noalias nocapture %D, i32* noalias nocapture readonly %E, i32* noalias nocapture readonly %F) #0 {
+define void @mixed_types(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture %D, ptr noalias nocapture readonly %E, ptr noalias nocapture readonly %F) #0 {
 ; CHECK-LABEL:        mixed_types(
 ; PREFER-FOLDING:     vector.body:
-; PREFER-FOLDING:     call <4 x i16> @llvm.masked.load.v4i16.p0v4i16
-; PREFER-FOLDING:     call <4 x i16> @llvm.masked.load.v4i16.p0v4i16
-; PREFER-FOLDING:     call void @llvm.masked.store.v4i16.p0v4i16
-; PREFER-FOLDING:     call <4 x i32> @llvm.masked.load.v4i32.p0v4i32
-; PREFER-FOLDING:     call <4 x i32> @llvm.masked.load.v4i32.p0v4i32
-; PREFER-FOLDING:     call void @llvm.masked.store.v4i32.p0v4i32
+; PREFER-FOLDING:     call <4 x i16> @llvm.masked.load.v4i16.p0
+; PREFER-FOLDING:     call <4 x i16> @llvm.masked.load.v4i16.p0
+; PREFER-FOLDING:     call void @llvm.masked.store.v4i16.p0
+; PREFER-FOLDING:     call <4 x i32> @llvm.masked.load.v4i32.p0
+; PREFER-FOLDING:     call <4 x i32> @llvm.masked.load.v4i32.p0
+; PREFER-FOLDING:     call void @llvm.masked.store.v4i32.p0
 ; PREFER-FOLDING:     br i1 %{{.*}}, label %{{.*}}, label %vector.body
 entry:
   br label %for.body
@@ -104,31 +104,31 @@ for.cond.cleanup:
 
 for.body:
   %i.018 = phi i32 [ 0, %entry ], [ %add9, %for.body ]
-  %arrayidx = getelementptr inbounds i16, i16* %B, i32 %i.018
-  %0 = load i16, i16* %arrayidx, align 2
-  %arrayidx1 = getelementptr inbounds i16, i16* %C, i32 %i.018
-  %1 = load i16, i16* %arrayidx1, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %B, i32 %i.018
+  %0 = load i16, ptr %arrayidx, align 2
+  %arrayidx1 = getelementptr inbounds i16, ptr %C, i32 %i.018
+  %1 = load i16, ptr %arrayidx1, align 2
   %add = add i16 %1, %0
-  %arrayidx4 = getelementptr inbounds i16, i16* %A, i32 %i.018
-  store i16 %add, i16* %arrayidx4, align 2
-  %arrayidx5 = getelementptr inbounds i32, i32* %E, i32 %i.018
-  %2 = load i32, i32* %arrayidx5, align 4
-  %arrayidx6 = getelementptr inbounds i32, i32* %F, i32 %i.018
-  %3 = load i32, i32* %arrayidx6, align 4
+  %arrayidx4 = getelementptr inbounds i16, ptr %A, i32 %i.018
+  store i16 %add, ptr %arrayidx4, align 2
+  %arrayidx5 = getelementptr inbounds i32, ptr %E, i32 %i.018
+  %2 = load i32, ptr %arrayidx5, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %F, i32 %i.018
+  %3 = load i32, ptr %arrayidx6, align 4
   %add7 = add nsw i32 %3, %2
-  %arrayidx8 = getelementptr inbounds i32, i32* %D, i32 %i.018
-  store i32 %add7, i32* %arrayidx8, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %D, i32 %i.018
+  store i32 %add7, ptr %arrayidx8, align 4
   %add9 = add nuw nsw i32 %i.018, 1
   %exitcond = icmp eq i32 %add9, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define void @zero_extending_load_allowed(i32* noalias nocapture %A, i8* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 {
+define void @zero_extending_load_allowed(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:    zero_extending_load_allowed(
 ; PREFER-FOLDING: vector.body:
-; PREFER-FOLDING: call <4 x i8> @llvm.masked.load.v4i8.p0v4i8
-; PREFER-FOLDING: call <4 x i32> @llvm.masked.load.v4i32.p0v4i32
-; PREFER-FOLDING: call void @llvm.masked.store.v4i32.p0v4i32
+; PREFER-FOLDING: call <4 x i8> @llvm.masked.load.v4i8.p0
+; PREFER-FOLDING: call <4 x i32> @llvm.masked.load.v4i32.p0
+; PREFER-FOLDING: call void @llvm.masked.store.v4i32.p0
 ; PREFER-FOLDING: br i1 %{{.*}}, label %{{.*}}, label %vector.body
 entry:
   br label %for.body
@@ -138,25 +138,25 @@ for.cond.cleanup:
 
 for.body:
   %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %B, i32 %i.09
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %B, i32 %i.09
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
-  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %C, i32 %i.09
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %conv
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i32 %i.09
+  store i32 %add, ptr %arrayidx2, align 4
   %add3 = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %add3, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define void @sign_extending_load_allowed(i32* noalias nocapture %A, i8* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 {
+define void @sign_extending_load_allowed(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:    sign_extending_load_allowed(
 ; PREFER-FOLDING: vector.body:
-; PREFER-FOLDING: call <4 x i8> @llvm.masked.load.v4i8.p0v4i8
-; PREFER-FOLDING: call <4 x i32> @llvm.masked.load.v4i32.p0v4i32
-; PREFER-FOLDING: call void @llvm.masked.store.v4i32.p0v4i32
+; PREFER-FOLDING: call <4 x i8> @llvm.masked.load.v4i8.p0
+; PREFER-FOLDING: call <4 x i32> @llvm.masked.load.v4i32.p0
+; PREFER-FOLDING: call void @llvm.masked.store.v4i32.p0
 ; PREFER-FOLDING: br i1 %{{.*}}, label %{{.*}}, label %vector.body
 entry:
   br label %for.body
@@ -166,22 +166,22 @@ for.cond.cleanup:
 
 for.body:
   %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %B, i32 %i.09
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %B, i32 %i.09
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = sext i8 %0 to i32
-  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %C, i32 %i.09
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %conv
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i32 %i.09
+  store i32 %add, ptr %arrayidx2, align 4
   %add3 = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %add3, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define void @narrowing_store_allowed(i8* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 {
+define void @narrowing_store_allowed(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:    narrowing_store_allowed(
-; PREFER-FOLDING: call void @llvm.masked.store.v4i8.p0v4i8
+; PREFER-FOLDING: call void @llvm.masked.store.v4i8.p0
 ; PREFER-FOLDING: br i1 %{{.*}}, label %{{.*}}, label %vector.body
 entry:
   br label %for.body
@@ -191,14 +191,14 @@ for.cond.cleanup:
 
 for.body:
   %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.09
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.09
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %C, i32 %i.09
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %0
   %conv = trunc i32 %add to i8
-  %arrayidx2 = getelementptr inbounds i8, i8* %A, i32 %i.09
-  store i8 %conv, i8* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds i8, ptr %A, i32 %i.09
+  store i8 %conv, ptr %arrayidx2, align 1
   %add3 = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %add3, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -217,11 +217,11 @@ entry:
 
 for.body:
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp slt i32 %inc, 1000
   br i1 %exitcond, label %for.body, label %for.end
@@ -243,11 +243,11 @@ entry:
 
 for.body:
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x float], [32 x float]* @ftab, i32 0, i32 %i.08
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [32 x float], ptr @ftab, i32 0, i32 %i.08
+  %0 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp oeq float %0, 0.000000e+00
   %. = select i1 %cmp1, float 2.000000e+00, float 1.000000e+00
-  store float %., float* %arrayidx, align 4
+  store float %., ptr %arrayidx, align 4
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp slt i32 %inc, 999
   br i1 %exitcond, label %for.body, label %for.end
@@ -256,12 +256,12 @@ for.end:
   ret float 0.000000e+00
 }
 
-define void @pragma_vect_predicate_disable(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 {
+define void @pragma_vect_predicate_disable(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:        pragma_vect_predicate_disable(
 ; PREFER-FOLDING:     vector.body:
-; PREFER-FOLDING-NOT: call <4 x i32> @llvm.masked.load.v4i32.p0v4i32
-; PREFER-FOLDING-NOT: call <4 x i32> @llvm.masked.load.v4i32.p0v4i32
-; PREFER-FOLDING-NOT: call void @llvm.masked.store.v4i32.p0v4i32
+; PREFER-FOLDING-NOT: call <4 x i32> @llvm.masked.load.v4i32.p0
+; PREFER-FOLDING-NOT: call <4 x i32> @llvm.masked.load.v4i32.p0
+; PREFER-FOLDING-NOT: call void @llvm.masked.store.v4i32.p0
 ; PREFER-FOLDING:     br i1 %{{.*}}, label %{{.*}}, label %vector.body
 entry:
   br label %for.body
@@ -271,19 +271,19 @@ for.cond.cleanup:
 
 for.body:
   %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.09
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.09
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %C, i32 %i.09
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i32 %i.09
+  store i32 %add, ptr %arrayidx2, align 4
   %add3 = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %add3, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !7
 }
 
-define void @stride_4(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 {
+define void @stride_4(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:        stride_4(
 ; PREFER-FOLDING:     vector.body:
 ; PREFER-FOLDING-NOT: llvm.masked.load
@@ -297,24 +297,24 @@ for.cond.cleanup:
 
 for.body:
   %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.09
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.09
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %C, i32 %i.09
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i32 %i.09
+  store i32 %add, ptr %arrayidx2, align 4
   %add3 = add nuw nsw i32 %i.09, 4
   %cmp = icmp ult i32 %add3, 731
   br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !5
 }
 
-define dso_local void @half(half* noalias nocapture %A, half* noalias nocapture readonly %B, half* noalias nocapture readonly %C) #0 {
+define dso_local void @half(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:    half(
 ; PREFER-FOLDING: vector.body:
-; PREFER-FOLDING: call <8 x half> @llvm.masked.load.v8f16.p0v8f16
-; PREFER-FOLDING: call <8 x half> @llvm.masked.load.v8f16.p0v8f16
-; PREFER-FOLDING: call void @llvm.masked.store.v8f16.p0v8f16
+; PREFER-FOLDING: call <8 x half> @llvm.masked.load.v8f16.p0
+; PREFER-FOLDING: call <8 x half> @llvm.masked.load.v8f16.p0
+; PREFER-FOLDING: call void @llvm.masked.store.v8f16.p0
 ; PREFER-FOLDING: br i1 %{{.*}}, label %{{.*}}, label %vector.body
 entry:
   br label %for.body
@@ -324,27 +324,27 @@ for.cond.cleanup:
 
 for.body:
   %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds half, half* %B, i32 %i.09
-  %0 = load half, half* %arrayidx, align 2
-  %arrayidx1 = getelementptr inbounds half, half* %C, i32 %i.09
-  %1 = load half, half* %arrayidx1, align 2
+  %arrayidx = getelementptr inbounds half, ptr %B, i32 %i.09
+  %0 = load half, ptr %arrayidx, align 2
+  %arrayidx1 = getelementptr inbounds half, ptr %C, i32 %i.09
+  %1 = load half, ptr %arrayidx1, align 2
   %add = fadd fast half %1, %0
-  %arrayidx2 = getelementptr inbounds half, half* %A, i32 %i.09
-  store half %add, half* %arrayidx2, align 2
+  %arrayidx2 = getelementptr inbounds half, ptr %A, i32 %i.09
+  store half %add, ptr %arrayidx2, align 2
   %add3 = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %add3, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define void @float(float* noalias nocapture %A, float* noalias nocapture readonly %B, float* noalias nocapture readonly %C) #0 {
+define void @float(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:    float(
 ; PREFER-FOLDING: vector.body:
 ; PREFER-FOLDING: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; PREFER-FOLDING: %[[VIVELEM0:.*]] = add i32 %index, 0
 ; PREFER-FOLDING: %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %[[VIVELEM0]], i32 431)
-; PREFER-FOLDING: call <4 x float> @llvm.masked.load.v4f32.p0v4f32({{.*}}%active.lane.mask
-; PREFER-FOLDING: call <4 x float> @llvm.masked.load.v4f32.p0v4f32({{.*}}%active.lane.mask
-; PREFER-FOLDING: call void @llvm.masked.store.v4f32.p0v4f32({{.*}}%active.lane.mask
+; PREFER-FOLDING: call <4 x float> @llvm.masked.load.v4f32.p0({{.*}}%active.lane.mask
+; PREFER-FOLDING: call <4 x float> @llvm.masked.load.v4f32.p0({{.*}}%active.lane.mask
+; PREFER-FOLDING: call void @llvm.masked.store.v4f32.p0({{.*}}%active.lane.mask
 ; PREFER-FOLDING: %index.next = add i32 %index, 4
 ; PREFER-FOLDING: br i1 %{{.*}}, label %{{.*}}, label %vector.body
 entry:
@@ -355,19 +355,19 @@ for.cond.cleanup:
 
 for.body:
   %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %B, i32 %i.09
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %C, i32 %i.09
-  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds float, ptr %B, i32 %i.09
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %C, i32 %i.09
+  %1 = load float, ptr %arrayidx1, align 4
   %add = fadd fast float %1, %0
-  %arrayidx2 = getelementptr inbounds float, float* %A, i32 %i.09
-  store float %add, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %A, i32 %i.09
+  store float %add, ptr %arrayidx2, align 4
   %add3 = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %add3, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !10
 }
 
-define void @fpext_allowed(float* noalias nocapture %A, half* noalias nocapture readonly %B, float* noalias nocapture readonly %C) #0 {
+define void @fpext_allowed(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:        fpext_allowed(
 ; PREFER-FOLDING:     vector.body:
 ; PREFER-FOLDING-NOT: llvm.masked.load
@@ -381,20 +381,20 @@ for.cond.cleanup:
 
 for.body:
   %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds half, half* %B, i32 %i.09
-  %0 = load half, half* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds half, ptr %B, i32 %i.09
+  %0 = load half, ptr %arrayidx, align 2
   %conv = fpext half %0 to float
-  %arrayidx1 = getelementptr inbounds float, float* %C, i32 %i.09
-  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %C, i32 %i.09
+  %1 = load float, ptr %arrayidx1, align 4
   %add = fadd fast float %1, %conv
-  %arrayidx2 = getelementptr inbounds float, float* %A, i32 %i.09
-  store float %add, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %A, i32 %i.09
+  store float %add, ptr %arrayidx2, align 4
   %add3 = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %add3, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define void @fptrunc_allowed(half* noalias nocapture %A, float* noalias nocapture readonly %B, float* noalias nocapture readonly %C) #0 {
+define void @fptrunc_allowed(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:        fptrunc_allowed(
 ; PREFER-FOLDING:     vector.body:
 ; PREFER-FOLDING-NOT: llvm.masked.load
@@ -408,14 +408,14 @@ for.cond.cleanup:
 
 for.body:
   %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %B, i32 %i.09
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %C, i32 %i.09
-  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds float, ptr %B, i32 %i.09
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %C, i32 %i.09
+  %1 = load float, ptr %arrayidx1, align 4
   %add = fadd fast float %1, %0
   %conv = fptrunc float %add to half
-  %arrayidx2 = getelementptr inbounds half, half* %A, i32 %i.09
-  store half %conv, half* %arrayidx2, align 2
+  %arrayidx2 = getelementptr inbounds half, ptr %A, i32 %i.09
+  store half %conv, ptr %arrayidx2, align 2
   %add3 = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %add3, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll
index 01be12b824f9a..596e42e9f094d 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll
@@ -4,15 +4,15 @@
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-none-none-eabi"
 
-define void @pred_loop(i32* %off, i32* %data, i32* %dst, i32 %n) #0 {
+define void @pred_loop(ptr %off, ptr %data, ptr %dst, i32 %n) #0 {
 
 ; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction:   %i.09 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
 ; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %add = add nuw nsw i32 %i.09, 1
-; CHECK-COST-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   %arrayidx = getelementptr inbounds i32, i32* %data, i32 %add
-; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %0 = load i32, i32* %arrayidx, align 4
+; CHECK-COST-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   %arrayidx = getelementptr inbounds i32, ptr %data, i32 %add
+; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %0 = load i32, ptr %arrayidx, align 4
 ; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %add1 = add nsw i32 %0, 5
-; CHECK-COST-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   %arrayidx2 = getelementptr inbounds i32, i32* %dst, i32 %i.09
-; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i32 %add1, i32* %arrayidx2, align 4
+; CHECK-COST-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   %arrayidx2 = getelementptr inbounds i32, ptr %dst, i32 %i.09
+; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i32 %add1, ptr %arrayidx2, align 4
 ; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %exitcond.not = icmp eq i32 %add, %n
 ; CHECK-COST-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   br i1 %exitcond.not, label %exit.loopexit, label %for.body
 ; CHECK-COST-NEXT: LV: Scalar loop costs: 5.
@@ -27,22 +27,22 @@ exit:                                 ; preds = %for.body, %entry
 for.body:                                         ; preds = %entry, %for.body
   %i.09 = phi i32 [ %add, %for.body ], [ 0, %entry ]
   %add = add nuw nsw i32 %i.09, 1
-  %arrayidx = getelementptr inbounds i32, i32* %data, i32 %add
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %data, i32 %add
+  %0 = load i32, ptr %arrayidx, align 4
   %add1 = add nsw i32 %0, 5
-  %arrayidx2 = getelementptr inbounds i32, i32* %dst, i32 %i.09
-  store i32 %add1, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %dst, i32 %i.09
+  store i32 %add1, ptr %arrayidx2, align 4
   %exitcond.not = icmp eq i32 %add, %n
   br i1 %exitcond.not, label %exit, label %for.body
 }
 
-define i32 @if_convert(i32* %a, i32* %b, i32 %start, i32 %end) #0 {
+define i32 @if_convert(ptr %a, ptr %b, i32 %start, i32 %end) #0 {
 
 ; CHECK-COST-2: LV: Found an estimated cost of 0 for VF 1 For instruction:   %i.032 = phi i32 [ %inc, %if.end ], [ %start, %for.body.preheader ]
-; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.032
-; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %0 = load i32, i32* %arrayidx, align 4
-; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   %arrayidx2 = getelementptr inbounds i32, i32* %b, i32 %i.032
-; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load i32, i32* %arrayidx2, align 4
+; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.032
+; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %0 = load i32, ptr %arrayidx, align 4
+; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   %arrayidx2 = getelementptr inbounds i32, ptr %b, i32 %i.032
+; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load i32, ptr %arrayidx2, align 4
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %cmp3 = icmp sgt i32 %0, %1
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   br i1 %cmp3, label %if.then, label %if.end
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %mul = mul nsw i32 %0, 5
@@ -50,10 +50,10 @@ define i32 @if_convert(i32* %a, i32* %b, i32 %start, i32 %end) #0 {
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   %factor = shl i32 %add, 1
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %sub = sub i32 %0, %1
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %add7 = add i32 %sub, %factor
-; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i32 %add7, i32* %arrayidx2, align 4
+; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i32 %add7, ptr %arrayidx2, align 4
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   br label %if.end
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   %k.0 = phi i32 [ %add, %if.then ], [ %0, %for.body ]
-; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i32 %k.0, i32* %arrayidx, align 4
+; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i32 %k.0, ptr %arrayidx, align 4
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %inc = add nsw i32 %i.032, 1
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %exitcond.not = icmp eq i32 %inc, %end
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
@@ -74,10 +74,10 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 
 for.body:                                         ; preds = %for.body.preheader, %if.end
   %i.032 = phi i32 [ %inc, %if.end ], [ %start, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.032
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i32 %i.032
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.032
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i32 %i.032
+  %1 = load i32, ptr %arrayidx2, align 4
   %cmp3 = icmp sgt i32 %0, %1
   br i1 %cmp3, label %if.then, label %if.end
 
@@ -87,12 +87,12 @@ if.then:                                          ; preds = %for.body
   %factor = shl i32 %add, 1
   %sub = sub i32 %0, %1
   %add7 = add i32 %sub, %factor
-  store i32 %add7, i32* %arrayidx2, align 4
+  store i32 %add7, ptr %arrayidx2, align 4
   br label %if.end
 
 if.end:                                           ; preds = %if.then, %for.body
   %k.0 = phi i32 [ %add, %if.then ], [ %0, %for.body ]
-  store i32 %k.0, i32* %arrayidx, align 4
+  store i32 %k.0, ptr %arrayidx, align 4
   %inc = add nsw i32 %i.032, 1
   %exitcond.not = icmp eq i32 %inc, %end
   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll
index ef9272ef47add..038725db279bb 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll
@@ -3,7 +3,7 @@
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
-define arm_aapcs_vfpcc i32 @minmaxval4(i32* nocapture readonly %x, i32* nocapture %minp, i32 %N) {
+define arm_aapcs_vfpcc i32 @minmaxval4(ptr nocapture readonly %x, ptr nocapture %minp, i32 %N) {
 ; CHECK-LABEL: @minmaxval4(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP26_NOT:%.*]] = icmp eq i32 [[N:%.*]], 0
@@ -18,9 +18,8 @@ define arm_aapcs_vfpcc i32 @minmaxval4(i32* nocapture readonly %x, i32* nocaptur
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[WIDE_LOAD]], <4 x i32> [[VEC_PHI1]])
 ; CHECK-NEXT:    [[TMP3]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[WIDE_LOAD]], <4 x i32> [[VEC_PHI]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
@@ -39,14 +38,14 @@ define arm_aapcs_vfpcc i32 @minmaxval4(i32* nocapture readonly %x, i32* nocaptur
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    [[MAX_0_LCSSA:%.*]] = phi i32 [ -2147483648, [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[MIN_0_LCSSA:%.*]] = phi i32 [ 2147483647, [[ENTRY]] ], [ [[TMP9:%.*]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    store i32 [[MIN_0_LCSSA]], i32* [[MINP:%.*]], align 4
+; CHECK-NEXT:    store i32 [[MIN_0_LCSSA]], ptr [[MINP:%.*]], align 4
 ; CHECK-NEXT:    ret i32 [[MAX_0_LCSSA]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_029:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[MIN_028:%.*]] = phi i32 [ [[TMP9]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[MAX_027:%.*]] = phi i32 [ [[TMP8]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[I_029]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_029]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TMP8]] = call i32 @llvm.smax.i32(i32 [[TMP7]], i32 [[MAX_027]])
 ; CHECK-NEXT:    [[TMP9]] = call i32 @llvm.smin.i32(i32 [[TMP7]], i32 [[MIN_028]])
 ; CHECK-NEXT:    [[INC]] = add nuw i32 [[I_029]], 1
@@ -60,15 +59,15 @@ entry:
 for.cond.cleanup:                                 ; preds = %for.body, %entry
   %max.0.lcssa = phi i32 [ -2147483648, %entry ], [ %cond, %for.body ]
   %min.0.lcssa = phi i32 [ 2147483647, %entry ], [ %cond9, %for.body ]
-  store i32 %min.0.lcssa, i32* %minp, align 4
+  store i32 %min.0.lcssa, ptr %minp, align 4
   ret i32 %max.0.lcssa
 
 for.body:                                         ; preds = %entry, %for.body
   %i.029 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %min.028 = phi i32 [ %cond9, %for.body ], [ 2147483647, %entry ]
   %max.027 = phi i32 [ %cond, %for.body ], [ -2147483648, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.029
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.029
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, %max.027
   %cond = select i1 %cmp1, i32 %0, i32 %max.027
   %cmp4 = icmp slt i32 %0, %min.028

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll
index bed6f9afe83fa..9f9ee26f8363f 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll
@@ -9,11 +9,11 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 ;   ARMHWLoops: Trip count does not fit into 32bits
 ;   preferPredicateOverEpilogue: hardware-loop is not profitable.
 ;
-define dso_local void @tail_folding(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) {
+define dso_local void @tail_folding(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) {
 ; CHECK-LABEL: tail_folding(
 ; CHECK:       vector.body:
-; CHECK-NOT:   call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(
-; CHECK-NOT:   call void @llvm.masked.store.v4i32.p0v4i32(
+; CHECK-NOT:   call <4 x i32> @llvm.masked.load.v4i32.p0(
+; CHECK-NOT:   call void @llvm.masked.store.v4i32.p0(
 ; CHECK:       br i1 %{{.*}}, label %{{.*}}, label %vector.body, !llvm.loop [[VEC_LOOP1:![0-9]+]]
 ;
 ; CHECK:       br i1 %{{.*}}, label %{{.*}}, label %for.body, !llvm.loop [[SCALAR_LOOP1:![0-9]+]]
@@ -25,13 +25,13 @@ for.cond.cleanup:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 430
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -40,16 +40,16 @@ for.body:
 ; The same test case but now with predicate.enable = true should get
 ; tail-folded.
 ;
-define dso_local void @predicate_loop_hint(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) {
+define dso_local void @predicate_loop_hint(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) {
 ; CHECK-LABEL: predicate_loop_hint(
 ; CHECK:       vector.body:
 ; CHECK:         %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; CHECK:         %[[ELEM0:.*]] = add i64 %index, 0
 ; CHECK:         %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %[[ELEM0]], i64 430)
-; CHECK:         %[[WML1:.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}<4 x i1> %active.lane.mask
-; CHECK:         %[[WML2:.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}<4 x i1> %active.lane.mask
+; CHECK:         %[[WML1:.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}<4 x i1> %active.lane.mask
+; CHECK:         %[[WML2:.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}<4 x i1> %active.lane.mask
 ; CHECK:         %[[ADD:.*]] = add nsw <4 x i32> %[[WML2]], %[[WML1]]
-; CHECK:         call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %[[ADD]], {{.*}}<4 x i1> %active.lane.mask
+; CHECK:         call void @llvm.masked.store.v4i32.p0(<4 x i32> %[[ADD]], {{.*}}<4 x i1> %active.lane.mask
 ; CHECK:         %index.next = add i64 %index, 4
 ; CHECK:         br i1 %{{.*}}, label %{{.*}}, label %vector.body, !llvm.loop [[VEC_LOOP2:![0-9]+]]
 ;
@@ -62,13 +62,13 @@ for.cond.cleanup:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 430
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !6

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll
index 33693e930dd77..8f3cfc5b20ee9 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll
@@ -11,11 +11,11 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 ; get tail-folded, except with -prefer-predicate-over-epilog which then
 ; overrules this.
 ;
-define dso_local void @flag_overrules_hint(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) local_unnamed_addr #0 {
+define dso_local void @flag_overrules_hint(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) local_unnamed_addr #0 {
 ; CHECK-LABEL: flag_overrules_hint(
 ; CHECK:       vector.body:
-; CHECK-NOT:   @llvm.masked.load.v8i32.p0v8i32(
-; CHECK-NOT:   @llvm.masked.store.v8i32.p0v8i32(
+; CHECK-NOT:   @llvm.masked.load.v8i32.p0(
+; CHECK-NOT:   @llvm.masked.store.v8i32.p0(
 ; CHECK:       br i1 %{{.*}}, label {{.*}}, label %vector.body
 
 ; PREDFLAG-LABEL: flag_overrules_hint(
@@ -23,10 +23,10 @@ define dso_local void @flag_overrules_hint(i32* noalias nocapture %A, i32* noali
 ; PREDFLAG:  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; PREDFLAG:  %[[ELEM0:.*]] = add i64 %index, 0
 ; PREDFLAG:  %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %[[ELEM0]], i64 430)
-; PREDFLAG:  %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %active.lane.mask
-; PREDFLAG:  %wide.masked.load1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %active.lane.mask
+; PREDFLAG:  %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %active.lane.mask
+; PREDFLAG:  %wide.masked.load1 = call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %active.lane.mask
 ; PREDFLAG:  %{{.*}} = add nsw <4 x i32> %wide.masked.load1, %wide.masked.load
-; PREDFLAG:  call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %active.lane.mask
+; PREDFLAG:  call void @llvm.masked.store.v4i32.p0({{.*}}, <4 x i1> %active.lane.mask
 ; PREDFLAG:  %index.next = add i64 %index, 4
 ; PREDFLAG:  %[[CMP:.*]] = icmp eq i64 %index.next, 432
 ; PREDFLAG:  br i1 %[[CMP]], label %middle.block, label %vector.body, !llvm.loop !0
@@ -38,19 +38,19 @@ for.cond.cleanup:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 430
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !10
 }
 
-define dso_local void @interleave4(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
+define dso_local void @interleave4(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
 ; PREDFLAG-LABEL: interleave4(
 ; PREDFLAG:  %[[ADD1:.*]] = add i32 %index, 0
 ; PREDFLAG:  %[[ADD2:.*]] = add i32 %index, 4
@@ -61,19 +61,19 @@ define dso_local void @interleave4(i32* noalias nocapture %A, i32* noalias nocap
 ; PREDFLAG:  %[[ALM3:active.lane.mask.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %[[ADD3]], i32 %N)
 ; PREDFLAG:  %[[ALM4:active.lane.mask.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %[[ADD4]], i32 %N)
 ;
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM1]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM2]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM3]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM4]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM1]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM2]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM3]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM4]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %[[ALM1]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %[[ALM2]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %[[ALM3]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %[[ALM4]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %[[ALM1]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %[[ALM2]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %[[ALM3]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %[[ALM4]],{{.*}}
 ;
-; PREDFLAG:  call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM1]])
-; PREDFLAG:  call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM2]])
-; PREDFLAG:  call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM3]])
-; PREDFLAG:  call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM4]])
+; PREDFLAG:  call void @llvm.masked.store.v4i32.p0({{.*}}, <4 x i1> %[[ALM1]])
+; PREDFLAG:  call void @llvm.masked.store.v4i32.p0({{.*}}, <4 x i1> %[[ALM2]])
+; PREDFLAG:  call void @llvm.masked.store.v4i32.p0({{.*}}, <4 x i1> %[[ALM3]])
+; PREDFLAG:  call void @llvm.masked.store.v4i32.p0({{.*}}, <4 x i1> %[[ALM4]])
 ;
 entry:
   %cmp8 = icmp sgt i32 %N, 0
@@ -90,13 +90,13 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.09
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.09
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %C, i32 %i.09
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i32 %i.09
+  store i32 %add, ptr %arrayidx2, align 4
   %inc = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %inc, %N
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !14

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reductions-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reductions-allowed.ll
index 4cf248940a57b..0313cd7b8a710 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reductions-allowed.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reductions-allowed.ll
@@ -12,7 +12,7 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 ; Check that this reduction is allowed, except when reductions are disable on
 ; the command line.
 ;
-define dso_local i32 @i32_add_reduction(i32* noalias nocapture readonly %B, i32 %N) local_unnamed_addr #0 {
+define dso_local i32 @i32_add_reduction(ptr noalias nocapture readonly %B, i32 %N) local_unnamed_addr #0 {
 ; COMMON-LABEL: i32_add_reduction(
 ; COMMON:       entry:
 ; CHECK:        @llvm.get.active.lane.mask
@@ -36,8 +36,8 @@ for.cond.cleanup:
 for.body:
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %S.07 = phi i32 [ %add, %for.body ], [ 1, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.08
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.08
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, %S.07
   %inc = add nuw nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %inc, %N

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/width-detect.ll b/llvm/test/Transforms/LoopVectorize/ARM/width-detect.ll
index b35d28759b5cb..e5dd922096ea2 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/width-detect.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/width-detect.ll
@@ -6,15 +6,15 @@ target triple = "thumbv7-apple-ios3.0.0"
 ;CHECK:foo_F32
 ;CHECK: <4 x float>
 ;CHECK:ret
-define float @foo_F32(float* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+define float @foo_F32(ptr nocapture %A, i32 %n) nounwind uwtable readonly ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %prod.01 = phi float [ %4, %.lr.ph ], [ 0.000000e+00, %0 ]
-  %2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %3 = load float, float* %2, align 8
+  %2 = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %3 = load float, ptr %2, align 8
   %4 = fmul fast float %prod.01, %3
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -29,15 +29,15 @@ define float @foo_F32(float* nocapture %A, i32 %n) nounwind uwtable readonly ssp
 ;CHECK:foo_I8
 ;CHECK: xor <16 x i8>
 ;CHECK:ret
-define signext i8 @foo_I8(i8* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+define signext i8 @foo_I8(ptr nocapture %A, i32 %n) nounwind uwtable readonly ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %red.01 = phi i8 [ %4, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv
-  %3 = load i8, i8* %2, align 1
+  %2 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv
+  %3 = load i8, ptr %2, align 1
   %4 = xor i8 %3, %red.01
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32

diff  --git a/llvm/test/Transforms/LoopVectorize/Hexagon/invalidate-cm-after-invalidating-interleavegroups.ll b/llvm/test/Transforms/LoopVectorize/Hexagon/invalidate-cm-after-invalidating-interleavegroups.ll
index f109ca8b8e64f..de3dac08b1503 100644
--- a/llvm/test/Transforms/LoopVectorize/Hexagon/invalidate-cm-after-invalidating-interleavegroups.ll
+++ b/llvm/test/Transforms/LoopVectorize/Hexagon/invalidate-cm-after-invalidating-interleavegroups.ll
@@ -18,7 +18,7 @@ target triple = "hexagon"
 ; CHECK-NOT: load <{{.*}} x i32>
 
 
-define void @test1(i32* %arg, i32 %N) #0 {
+define void @test1(ptr %arg, i32 %N) #0 {
 entry:
   %tmp = alloca i8
   br label %loop
@@ -27,27 +27,27 @@ loop:                                              ; preds = %bb2, %bb
   %iv = phi i32 [ %iv.next, %loop], [ 0, %entry ]
   %idx.mul = mul nuw nsw i32 %iv, 7
   %idx.start = add nuw nsw i32 %idx.mul, 1
-  %tmp6 = getelementptr inbounds i32, i32* %arg, i32 %idx.start
-  %tmp7 = load i32, i32* %tmp6, align 4
+  %tmp6 = getelementptr inbounds i32, ptr %arg, i32 %idx.start
+  %tmp7 = load i32, ptr %tmp6, align 4
   %tmp8 = add nuw nsw i32 %idx.start, 1
-  %tmp9 = getelementptr inbounds i32, i32* %arg, i32 %tmp8
-  %tmp10 = load i32, i32* %tmp9, align 4
+  %tmp9 = getelementptr inbounds i32, ptr %arg, i32 %tmp8
+  %tmp10 = load i32, ptr %tmp9, align 4
   %tmp11 = add nuw nsw i32 %idx.start, 2
-  %tmp12 = getelementptr inbounds i32, i32* %arg, i32 %tmp11
-  %tmp13 = load i32, i32* %tmp12, align 4
+  %tmp12 = getelementptr inbounds i32, ptr %arg, i32 %tmp11
+  %tmp13 = load i32, ptr %tmp12, align 4
   %tmp14 = add nuw nsw i32 %idx.start, 3
-  %tmp15 = getelementptr inbounds i32, i32* %arg, i32 %tmp14
-  %tmp16 = load i32, i32* %tmp15, align 4
+  %tmp15 = getelementptr inbounds i32, ptr %arg, i32 %tmp14
+  %tmp16 = load i32, ptr %tmp15, align 4
   %tmp18 = add nuw nsw i32 %idx.start, 4
-  %tmp19 = getelementptr inbounds i32, i32* %arg, i32 %tmp18
-  %tmp20 = load i32, i32* %tmp19, align 4
+  %tmp19 = getelementptr inbounds i32, ptr %arg, i32 %tmp18
+  %tmp20 = load i32, ptr %tmp19, align 4
   %tmp21 = add nuw nsw i32 %idx.start, 5
-  %tmp22 = getelementptr inbounds i32, i32* %arg, i32 %tmp21
-  %tmp23 = load i32, i32* %tmp22, align 4
+  %tmp22 = getelementptr inbounds i32, ptr %arg, i32 %tmp21
+  %tmp23 = load i32, ptr %tmp22, align 4
   %tmp25 = add nuw nsw i32 %idx.start, 6
-  %tmp26 = getelementptr inbounds i32, i32* %arg, i32 %tmp25
-  %tmp27 = load i32, i32* %tmp26, align 4
-  store i8 0, i8* %tmp, align 1
+  %tmp26 = getelementptr inbounds i32, ptr %arg, i32 %tmp25
+  %tmp27 = load i32, ptr %tmp26, align 4
+  store i8 0, ptr %tmp, align 1
   %iv.next= add nuw nsw i32 %iv, 1
   %exit.cond = icmp eq i32 %iv.next, %N
   br i1 %exit.cond, label %exit, label %loop
@@ -62,7 +62,7 @@ exit:                                             ; preds = %loop
 ; CHECK-LABEL: @test2
 ; CHECK: vector.body:
 ; CHECK-NOT: load <{{.*}} x i32>
-define void @test2(i32* %arg) #1 {
+define void @test2(ptr %arg) #1 {
 entry:
   %tmp = alloca i8
   br label %loop
@@ -70,18 +70,18 @@ entry:
 loop:                                              ; preds = %bb2, %bb
   %iv = phi i32 [ %iv.next, %loop], [ 0, %entry ]
   %idx.start = mul nuw nsw i32 %iv, 5
-  %tmp6 = getelementptr inbounds i32, i32* %arg, i32 %idx.start
-  %tmp7 = load i32, i32* %tmp6, align 4
+  %tmp6 = getelementptr inbounds i32, ptr %arg, i32 %idx.start
+  %tmp7 = load i32, ptr %tmp6, align 4
   %tmp8 = add nuw nsw i32 %idx.start, 1
-  %tmp9 = getelementptr inbounds i32, i32* %arg, i32 %tmp8
-  %tmp10 = load i32, i32* %tmp9, align 4
+  %tmp9 = getelementptr inbounds i32, ptr %arg, i32 %tmp8
+  %tmp10 = load i32, ptr %tmp9, align 4
   %tmp11 = add nuw nsw i32 %idx.start, 2
-  %tmp12 = getelementptr inbounds i32, i32* %arg, i32 %tmp11
-  %tmp13 = load i32, i32* %tmp12, align 4
+  %tmp12 = getelementptr inbounds i32, ptr %arg, i32 %tmp11
+  %tmp13 = load i32, ptr %tmp12, align 4
   %tmp14 = add nuw nsw i32 %idx.start, 3
-  %tmp15 = getelementptr inbounds i32, i32* %arg, i32 %tmp14
-  %tmp16 = load i32, i32* %tmp15, align 4
-  store i8 0, i8* %tmp, align 1
+  %tmp15 = getelementptr inbounds i32, ptr %arg, i32 %tmp14
+  %tmp16 = load i32, ptr %tmp15, align 4
+  store i8 0, ptr %tmp, align 1
   %iv.next= add nuw nsw i32 %iv, 1
   %exit.cond = icmp eq i32 %iv.next, 128
   br i1 %exit.cond, label %exit, label %loop

diff  --git a/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll b/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll
index 6f4a23cc1db7f..4c7a7d31c5e72 100644
--- a/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll
@@ -16,7 +16,7 @@ entry:
 loop:
   %g.016 = phi i32 [ 0, %entry ], [ %g.1.lcssa, %loop ]
   %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-  %0 = load i8, i8* undef, align 1
+  %0 = load i8, ptr undef, align 1
   %g.1.lcssa = add i32 %g.016, undef
   %iv.next = add nsw i32 %iv, 1
   %exitcond = icmp eq i32 %iv.next, 0

diff  --git a/llvm/test/Transforms/LoopVectorize/Hexagon/minimum-vf.ll b/llvm/test/Transforms/LoopVectorize/Hexagon/minimum-vf.ll
index 78c98c7c705a2..4ff7ad9ab4390 100644
--- a/llvm/test/Transforms/LoopVectorize/Hexagon/minimum-vf.ll
+++ b/llvm/test/Transforms/LoopVectorize/Hexagon/minimum-vf.ll
@@ -8,19 +8,18 @@
 target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
 target triple = "hexagon"
 
-%s.0 = type { i8*, i32, i32, i32, i32 }
+%s.0 = type { ptr, i32, i32, i32, i32 }
 
- at g0 = external dso_local local_unnamed_addr global %s.0**, align 4
+ at g0 = external dso_local local_unnamed_addr global ptr, align 4
 
-declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0
-declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0
+declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #0
+declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #0
 
 ; Function Attrs: nounwind
-define hidden fastcc void @f0(i8* nocapture %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i8 zeroext %a5) unnamed_addr #1 {
+define hidden fastcc void @f0(ptr nocapture %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i8 zeroext %a5) unnamed_addr #1 {
 b0:
   %v0 = alloca [4 x [9 x i16]], align 8
-  %v1 = bitcast [4 x [9 x i16]]* %v0 to i8*
-  call void @llvm.lifetime.start.p0i8(i64 72, i8* nonnull %v1) #2
+  call void @llvm.lifetime.start.p0(i64 72, ptr nonnull %v0) #2
   %v2 = add i32 %a1, -2
   %v3 = add i32 %a3, -9
   %v4 = icmp ugt i32 %v2, %v3
@@ -28,12 +27,11 @@ b0:
   %v6 = add i32 %a4, -9
   %v7 = icmp ugt i32 %v5, %v6
   %v8 = or i1 %v4, %v7
-  %v9 = load %s.0**, %s.0*** @g0, align 4, !tbaa !1
+  %v9 = load ptr, ptr @g0, align 4, !tbaa !1
   %v10 = zext i8 %a5 to i32
-  %v11 = getelementptr inbounds %s.0*, %s.0** %v9, i32 %v10
-  %v12 = load %s.0*, %s.0** %v11, align 4, !tbaa !1
-  %v13 = getelementptr inbounds %s.0, %s.0* %v12, i32 0, i32 0
-  %v14 = load i8*, i8** %v13, align 4, !tbaa !5
+  %v11 = getelementptr inbounds ptr, ptr %v9, i32 %v10
+  %v12 = load ptr, ptr %v11, align 4, !tbaa !1
+  %v14 = load ptr, ptr %v12, align 4, !tbaa !5
   br i1 %v8, label %b1, label %b2
 
 b1:                                               ; preds = %b1, %b0
@@ -45,41 +43,41 @@ b1:                                               ; preds = %b1, %b0
   %v20 = select i1 %v17, i32 0, i32 %v19
   %v21 = mul i32 %v20, %a3
   %v22 = add i32 97, %v21
-  %v23 = getelementptr inbounds i8, i8* %v14, i32 %v22
-  %v24 = load i8, i8* %v23, align 1, !tbaa !8
+  %v23 = getelementptr inbounds i8, ptr %v14, i32 %v22
+  %v24 = load i8, ptr %v23, align 1, !tbaa !8
   %v25 = zext i8 %v24 to i32
   %v26 = add i32 101, %v21
-  %v27 = getelementptr inbounds i8, i8* %v14, i32 %v26
-  %v28 = load i8, i8* %v27, align 1, !tbaa !8
+  %v27 = getelementptr inbounds i8, ptr %v14, i32 %v26
+  %v28 = load i8, ptr %v27, align 1, !tbaa !8
   %v29 = zext i8 %v28 to i32
   %v30 = mul nsw i32 %v29, -5
   %v31 = add nsw i32 %v30, %v25
   %v32 = add i32 106, %v21
-  %v33 = getelementptr inbounds i8, i8* %v14, i32 %v32
-  %v34 = load i8, i8* %v33, align 1, !tbaa !8
+  %v33 = getelementptr inbounds i8, ptr %v14, i32 %v32
+  %v34 = load i8, ptr %v33, align 1, !tbaa !8
   %v35 = zext i8 %v34 to i32
   %v36 = mul nuw nsw i32 %v35, 20
   %v37 = add nsw i32 %v36, %v31
   %v38 = add i32 111, %v21
-  %v39 = getelementptr inbounds i8, i8* %v14, i32 %v38
-  %v40 = load i8, i8* %v39, align 1, !tbaa !8
+  %v39 = getelementptr inbounds i8, ptr %v14, i32 %v38
+  %v40 = load i8, ptr %v39, align 1, !tbaa !8
   %v41 = zext i8 %v40 to i32
   %v42 = mul nuw nsw i32 %v41, 20
   %v43 = add nsw i32 %v42, %v37
   %v44 = add i32 116, %v21
-  %v45 = getelementptr inbounds i8, i8* %v14, i32 %v44
-  %v46 = load i8, i8* %v45, align 1, !tbaa !8
+  %v45 = getelementptr inbounds i8, ptr %v14, i32 %v44
+  %v46 = load i8, ptr %v45, align 1, !tbaa !8
   %v47 = zext i8 %v46 to i32
   %v48 = mul nsw i32 %v47, -5
   %v49 = add nsw i32 %v48, %v43
   %v50 = add i32 120, %v21
-  %v51 = getelementptr inbounds i8, i8* %v14, i32 %v50
-  %v52 = load i8, i8* %v51, align 1, !tbaa !8
+  %v51 = getelementptr inbounds i8, ptr %v14, i32 %v50
+  %v52 = load i8, ptr %v51, align 1, !tbaa !8
   %v53 = zext i8 %v52 to i32
   %v54 = add nsw i32 %v49, %v53
   %v55 = trunc i32 %v54 to i16
-  %v56 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 0, i32 %v15
-  store i16 %v55, i16* %v56, align 2, !tbaa !9
+  %v56 = getelementptr inbounds [4 x [9 x i16]], ptr %v0, i32 0, i32 0, i32 %v15
+  store i16 %v55, ptr %v56, align 2, !tbaa !9
   %v57 = mul nsw i32 %v35, -5
   %v58 = add nsw i32 %v57, %v29
   %v59 = add nsw i32 %v42, %v58
@@ -88,13 +86,13 @@ b1:                                               ; preds = %b1, %b0
   %v62 = mul nsw i32 %v53, -5
   %v63 = add nsw i32 %v62, %v61
   %v64 = add i32 125, %v21
-  %v65 = getelementptr inbounds i8, i8* %v14, i32 %v64
-  %v66 = load i8, i8* %v65, align 1, !tbaa !8
+  %v65 = getelementptr inbounds i8, ptr %v14, i32 %v64
+  %v66 = load i8, ptr %v65, align 1, !tbaa !8
   %v67 = zext i8 %v66 to i32
   %v68 = add nsw i32 %v63, %v67
   %v69 = trunc i32 %v68 to i16
-  %v70 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 1, i32 %v15
-  store i16 %v69, i16* %v70, align 2, !tbaa !9
+  %v70 = getelementptr inbounds [4 x [9 x i16]], ptr %v0, i32 0, i32 1, i32 %v15
+  store i16 %v69, ptr %v70, align 2, !tbaa !9
   %v71 = mul nsw i32 %v41, -5
   %v72 = add nsw i32 %v71, %v35
   %v73 = add nsw i32 %v60, %v72
@@ -103,54 +101,54 @@ b1:                                               ; preds = %b1, %b0
   %v76 = mul nsw i32 %v67, -5
   %v77 = add nsw i32 %v76, %v75
   %v78 = add i32 130, %v21
-  %v79 = getelementptr inbounds i8, i8* %v14, i32 %v78
-  %v80 = load i8, i8* %v79, align 1, !tbaa !8
+  %v79 = getelementptr inbounds i8, ptr %v14, i32 %v78
+  %v80 = load i8, ptr %v79, align 1, !tbaa !8
   %v81 = zext i8 %v80 to i32
   %v82 = add nsw i32 %v77, %v81
   %v83 = trunc i32 %v82 to i16
-  %v84 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 2, i32 %v15
-  store i16 %v83, i16* %v84, align 2, !tbaa !9
+  %v84 = getelementptr inbounds [4 x [9 x i16]], ptr %v0, i32 0, i32 2, i32 %v15
+  store i16 %v83, ptr %v84, align 2, !tbaa !9
   %v85 = add i32 92, %v21
-  %v86 = getelementptr inbounds i8, i8* %v14, i32 %v85
-  %v87 = load i8, i8* %v86, align 1, !tbaa !8
+  %v86 = getelementptr inbounds i8, ptr %v14, i32 %v85
+  %v87 = load i8, ptr %v86, align 1, !tbaa !8
   %v88 = zext i8 %v87 to i16
   %v89 = add i32 135, %v21
-  %v90 = getelementptr inbounds i8, i8* %v14, i32 %v89
-  %v91 = load i8, i8* %v90, align 1, !tbaa !8
+  %v90 = getelementptr inbounds i8, ptr %v14, i32 %v89
+  %v91 = load i8, ptr %v90, align 1, !tbaa !8
   %v92 = zext i8 %v91 to i16
   %v93 = mul nsw i16 %v92, -5
   %v94 = add nsw i16 %v93, %v88
   %v95 = add i32 140, %v21
-  %v96 = getelementptr inbounds i8, i8* %v14, i32 %v95
-  %v97 = load i8, i8* %v96, align 1, !tbaa !8
+  %v96 = getelementptr inbounds i8, ptr %v14, i32 %v95
+  %v97 = load i8, ptr %v96, align 1, !tbaa !8
   %v98 = zext i8 %v97 to i16
   %v99 = mul nuw nsw i16 %v98, 20
   %v100 = add nsw i16 %v99, %v94
   %v101 = add i32 145, %v21
-  %v102 = getelementptr inbounds i8, i8* %v14, i32 %v101
-  %v103 = load i8, i8* %v102, align 1, !tbaa !8
+  %v102 = getelementptr inbounds i8, ptr %v14, i32 %v101
+  %v103 = load i8, ptr %v102, align 1, !tbaa !8
   %v104 = zext i8 %v103 to i16
   %v105 = mul nuw nsw i16 %v104, 20
   %v106 = add i16 %v105, %v100
   %v107 = add i32 150, %v21
-  %v108 = getelementptr inbounds i8, i8* %v14, i32 %v107
-  %v109 = load i8, i8* %v108, align 1, !tbaa !8
+  %v108 = getelementptr inbounds i8, ptr %v14, i32 %v107
+  %v109 = load i8, ptr %v108, align 1, !tbaa !8
   %v110 = zext i8 %v109 to i16
   %v111 = mul nsw i16 %v110, -5
   %v112 = add i16 %v111, %v106
   %v113 = add i32 154, %v21
-  %v114 = getelementptr inbounds i8, i8* %v14, i32 %v113
-  %v115 = load i8, i8* %v114, align 1, !tbaa !8
+  %v114 = getelementptr inbounds i8, ptr %v14, i32 %v113
+  %v115 = load i8, ptr %v114, align 1, !tbaa !8
   %v116 = zext i8 %v115 to i16
   %v117 = add i16 %v112, %v116
-  %v118 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 3, i32 %v15
-  store i16 %v117, i16* %v118, align 2, !tbaa !9
+  %v118 = getelementptr inbounds [4 x [9 x i16]], ptr %v0, i32 0, i32 3, i32 %v15
+  store i16 %v117, ptr %v118, align 2, !tbaa !9
   %v119 = add nuw nsw i32 %v15, 1
   %v120 = icmp eq i32 %v119, 19
   br i1 %v120, label %b2, label %b1
 
 b2:                                               ; preds = %b1, %b0
-  call void @llvm.lifetime.end.p0i8(i64 72, i8* nonnull %v1) #2
+  call void @llvm.lifetime.end.p0(i64 72, ptr nonnull %v0) #2
   ret void
 }
 

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/interleave_IC.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/interleave_IC.ll
index cb0888e2fde84..7121c85dd59be 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/interleave_IC.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/interleave_IC.ll
@@ -2,10 +2,10 @@
 ; RUN: opt < %s -passes='loop-vectorize' -S -mcpu=pwr9 -interleave-small-loop-scalar-reduction=true 2>&1 | FileCheck %s
 
 ; CHECK-LABEL: vector.body
-; CHECK: load double, double*
-; CHECK-NEXT: load double, double*
-; CHECK-NEXT: load double, double*
-; CHECK-NEXT: load double, double*
+; CHECK: load double, ptr
+; CHECK-NEXT: load double, ptr
+; CHECK-NEXT: load double, ptr
+; CHECK-NEXT: load double, ptr
 
 ; CHECK: fmul fast double
 ; CHECK-NEXT: fmul fast double
@@ -20,38 +20,36 @@
 target datalayout = "e-m:e-i64:64-n32:64"
 target triple = "powerpc64le-unknown-linux-gnu"
 
-define dso_local void @test(i32*** %arg, double** %arg1) align 2 {
+define dso_local void @test(ptr %arg, ptr %arg1) align 2 {
 bb:
-  %tpm15 = load i32**, i32*** %arg, align 8
-  %tpm19 = load double*, double** %arg1, align 8
+  %tpm15 = load ptr, ptr %arg, align 8
+  %tpm19 = load ptr, ptr %arg1, align 8
   br label %bb22
 bb22:                                             ; preds = %bb33, %bb
   %tpm26 = add i64 0, 1
   %tpm10 = alloca i32, align 8
-  %tpm27 = getelementptr inbounds i32, i32* %tpm10, i64 %tpm26
-  %tpm28 = getelementptr inbounds i32*, i32** %tpm15, i64 0
-  %tpm29 = load i32*, i32** %tpm28, align 8
+  %tpm27 = getelementptr inbounds i32, ptr %tpm10, i64 %tpm26
+  %tpm29 = load ptr, ptr %tpm15, align 8
   %tpm17 = alloca double, align 8
-  %tpm32 = getelementptr inbounds double, double* %tpm17, i64 %tpm26
+  %tpm32 = getelementptr inbounds double, ptr %tpm17, i64 %tpm26
   br label %bb40
 bb33:                                             ; preds = %bb40
-  %tpm35 = getelementptr inbounds double, double* %tpm19, i64 0
   %tpm37 = fsub fast double 0.000000e+00, %tpm50
-  store double %tpm37, double* %tpm35, align 8
+  store double %tpm37, ptr %tpm19, align 8
   br label %bb22
 bb40:                                             ; preds = %bb40, %bb22
-  %tpm41 = phi i32* [ %tpm51, %bb40 ], [ %tpm27, %bb22 ]
-  %tpm42 = phi double* [ %tpm52, %bb40 ], [ %tpm32, %bb22 ]
+  %tpm41 = phi ptr [ %tpm51, %bb40 ], [ %tpm27, %bb22 ]
+  %tpm42 = phi ptr [ %tpm52, %bb40 ], [ %tpm32, %bb22 ]
   %tpm43 = phi double [ %tpm50, %bb40 ], [ 0.000000e+00, %bb22 ]
-  %tpm44 = load double, double* %tpm42, align 8
-  %tpm45 = load i32, i32* %tpm41, align 4
+  %tpm44 = load double, ptr %tpm42, align 8
+  %tpm45 = load i32, ptr %tpm41, align 4
   %tpm46 = zext i32 %tpm45 to i64
-  %tpm47 = getelementptr inbounds double, double* %tpm19, i64 %tpm46
-  %tpm48 = load double, double* %tpm47, align 8
+  %tpm47 = getelementptr inbounds double, ptr %tpm19, i64 %tpm46
+  %tpm48 = load double, ptr %tpm47, align 8
   %tpm49 = fmul fast double %tpm48, %tpm44
   %tpm50 = fadd fast double %tpm49, %tpm43
-  %tpm51 = getelementptr inbounds i32, i32* %tpm41, i64 1
-  %tpm52 = getelementptr inbounds double, double* %tpm42, i64 1
-  %tpm53 = icmp eq i32* %tpm51, %tpm29
+  %tpm51 = getelementptr inbounds i32, ptr %tpm41, i64 1
+  %tpm52 = getelementptr inbounds double, ptr %tpm42, i64 1
+  %tpm53 = icmp eq ptr %tpm51, %tpm29
   br i1 %tpm53, label %bb33, label %bb40
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/interleaved-pointer-runtime-check-unprofitable.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/interleaved-pointer-runtime-check-unprofitable.ll
index 0a3a4eb3e0c8f..297de36608e25 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/interleaved-pointer-runtime-check-unprofitable.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/interleaved-pointer-runtime-check-unprofitable.ll
@@ -10,7 +10,7 @@
 ; CHECK-LABEL: @eddy_
diff _caleddy_
 ; CHECK-NOT: vector.memcheck
 
-define fastcc void @eddy_
diff _caleddy_(i64* %wet_cl, i64 %0, i32 %ncol.cast.val) {
+define fastcc void @eddy_
diff _caleddy_(ptr %wet_cl, i64 %0, i32 %ncol.cast.val) {
 entry:
   %trip.count = add nuw i32 %ncol.cast.val, 1
   %wide.trip.count = zext i32 %ncol.cast.val to i64
@@ -31,53 +31,41 @@ loop.body:
   %indvars.iv774 = phi i64 [ 0, %entry ], [ %indvars.iv.next775, %loop.body ]
   %12 = add nsw i64 %indvars.iv774, -5
   %13 = add i64 %12, %0
-  %14 = getelementptr i64, i64* %wet_cl, i64 %13
-  %15 = bitcast i64* %14 to double*
-  store double 0.000000e+00, double* %15, align 8
-  %16 = add i64 %12, %1
-  %17 = getelementptr i64, i64* %wet_cl, i64 %16
-  %18 = bitcast i64* %17 to double*
-  store double 0.000000e+00, double* %18, align 8
-  %19 = add i64 %12, %2
-  %20 = getelementptr i64, i64* %wet_cl, i64 %19
-  %21 = bitcast i64* %20 to double*
-  store double 0.000000e+00, double* %21, align 8
-  %22 = add i64 %12, %3
-  %23 = getelementptr i64, i64* %wet_cl, i64 %22
-  %24 = bitcast i64* %23 to double*
-  store double 0.000000e+00, double* %24, align 8
-  %25 = add i64 %12, %4
-  %26 = getelementptr i64, i64* %wet_cl, i64 %25
-  %27 = bitcast i64* %26 to double*
-  store double 0.000000e+00, double* %27, align 8
-  %28 = add i64 %12, %5
-  %29 = getelementptr i64, i64* %wet_cl, i64 %28
-  %30 = bitcast i64* %29 to double*
-  store double 0.000000e+00, double* %30, align 8
-  %31 = add i64 %12, %6
-  %32 = getelementptr i64, i64* %wet_cl, i64 %31
-  %33 = bitcast i64* %32 to double*
-  store double 0.000000e+00, double* %33, align 8
-  %34 = add i64 %12, %7
-  %35 = getelementptr i64, i64* %wet_cl, i64 %34
-  %36 = bitcast i64* %35 to double*
-  store double 0.000000e+00, double* %36, align 8
-  %37 = add i64 %12, %8
-  %38 = getelementptr i64, i64* %wet_cl, i64 %37
-  %39 = bitcast i64* %38 to double*
-  store double 0.000000e+00, double* %39, align 8
-  %40 = add i64 %12, %9
-  %41 = getelementptr i64, i64* %wet_cl, i64 %40
-  %42 = bitcast i64* %41 to double*
-  store double 0.000000e+00, double* %42, align 8
-  %43 = add i64 %12, %10
-  %44 = getelementptr i64, i64* %wet_cl, i64 %43
-  %45 = bitcast i64* %44 to double*
-  store double 0.000000e+00, double* %45, align 8
-  %46 = add i64 %12, %11
-  %47 = getelementptr i64, i64* %wet_cl, i64 %46
-  %48 = bitcast i64* %47 to double*
-  store double 0.000000e+00, double* %48, align 8
+  %14 = getelementptr i64, ptr %wet_cl, i64 %13
+  store double 0.000000e+00, ptr %14, align 8
+  %15 = add i64 %12, %1
+  %16 = getelementptr i64, ptr %wet_cl, i64 %15
+  store double 0.000000e+00, ptr %16, align 8
+  %17 = add i64 %12, %2
+  %18 = getelementptr i64, ptr %wet_cl, i64 %17
+  store double 0.000000e+00, ptr %18, align 8
+  %19 = add i64 %12, %3
+  %20 = getelementptr i64, ptr %wet_cl, i64 %19
+  store double 0.000000e+00, ptr %20, align 8
+  %21 = add i64 %12, %4
+  %22 = getelementptr i64, ptr %wet_cl, i64 %21
+  store double 0.000000e+00, ptr %22, align 8
+  %23 = add i64 %12, %5
+  %24 = getelementptr i64, ptr %wet_cl, i64 %23
+  store double 0.000000e+00, ptr %24, align 8
+  %25 = add i64 %12, %6
+  %26 = getelementptr i64, ptr %wet_cl, i64 %25
+  store double 0.000000e+00, ptr %26, align 8
+  %27 = add i64 %12, %7
+  %28 = getelementptr i64, ptr %wet_cl, i64 %27
+  store double 0.000000e+00, ptr %28, align 8
+  %29 = add i64 %12, %8
+  %30 = getelementptr i64, ptr %wet_cl, i64 %29
+  store double 0.000000e+00, ptr %30, align 8
+  %31 = add i64 %12, %9
+  %32 = getelementptr i64, ptr %wet_cl, i64 %31
+  store double 0.000000e+00, ptr %32, align 8
+  %33 = add i64 %12, %10
+  %34 = getelementptr i64, ptr %wet_cl, i64 %33
+  store double 0.000000e+00, ptr %34, align 8
+  %35 = add i64 %12, %11
+  %36 = getelementptr i64, ptr %wet_cl, i64 %35
+  store double 0.000000e+00, ptr %36, align 8
   %indvars.iv.next775 = add nuw nsw i64 %indvars.iv774, 1
   %exitcond778.not = icmp eq i64 %indvars.iv.next775, %wide.trip.count
   br i1 %exitcond778.not, label %loop.end, label %loop.body

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll
index ab945fdad40c1..a83bf7d41569f 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll
@@ -19,7 +19,7 @@
 target datalayout = "e-m:e-i64:64-n32:64"
 target triple = "powerpc64le-ibm-linux-gnu"
 
-define void @QLA_F3_r_veq_norm2_V(float* noalias nocapture %r, [3 x { float, float }]* noalias nocapture readonly %a, i32 signext %n) #0 {
+define void @QLA_F3_r_veq_norm2_V(ptr noalias nocapture %r, ptr noalias nocapture readonly %a, i32 signext %n) #0 {
 entry:
   %cmp24 = icmp sgt i32 %n, 0
   br i1 %cmp24, label %for.cond1.preheader.preheader, label %for.end13
@@ -30,28 +30,28 @@ for.cond1.preheader.preheader:                    ; preds = %entry
 for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.cond1.preheader
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.cond1.preheader ], [ 0, %for.cond1.preheader.preheader ]
   %sum.026 = phi double [ %add10.2, %for.cond1.preheader ], [ 0.000000e+00, %for.cond1.preheader.preheader ]
-  %arrayidx5.realp = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 0, i32 0
-  %arrayidx5.real = load float, float* %arrayidx5.realp, align 8
-  %arrayidx5.imagp = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 0, i32 1
-  %arrayidx5.imag = load float, float* %arrayidx5.imagp, align 8
+  %arrayidx5.realp = getelementptr inbounds [3 x { float, float }], ptr %a, i64 %indvars.iv, i64 0, i32 0
+  %arrayidx5.real = load float, ptr %arrayidx5.realp, align 8
+  %arrayidx5.imagp = getelementptr inbounds [3 x { float, float }], ptr %a, i64 %indvars.iv, i64 0, i32 1
+  %arrayidx5.imag = load float, ptr %arrayidx5.imagp, align 8
   %mul = fmul fast float %arrayidx5.real, %arrayidx5.real
   %mul9 = fmul fast float %arrayidx5.imag, %arrayidx5.imag
   %add = fadd fast float %mul9, %mul
   %conv = fpext float %add to double
   %add10 = fadd fast double %conv, %sum.026
-  %arrayidx5.realp.1 = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 1, i32 0
-  %arrayidx5.real.1 = load float, float* %arrayidx5.realp.1, align 8
-  %arrayidx5.imagp.1 = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 1, i32 1
-  %arrayidx5.imag.1 = load float, float* %arrayidx5.imagp.1, align 8
+  %arrayidx5.realp.1 = getelementptr inbounds [3 x { float, float }], ptr %a, i64 %indvars.iv, i64 1, i32 0
+  %arrayidx5.real.1 = load float, ptr %arrayidx5.realp.1, align 8
+  %arrayidx5.imagp.1 = getelementptr inbounds [3 x { float, float }], ptr %a, i64 %indvars.iv, i64 1, i32 1
+  %arrayidx5.imag.1 = load float, ptr %arrayidx5.imagp.1, align 8
   %mul.1 = fmul fast float %arrayidx5.real.1, %arrayidx5.real.1
   %mul9.1 = fmul fast float %arrayidx5.imag.1, %arrayidx5.imag.1
   %add.1 = fadd fast float %mul9.1, %mul.1
   %conv.1 = fpext float %add.1 to double
   %add10.1 = fadd fast double %conv.1, %add10
-  %arrayidx5.realp.2 = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 2, i32 0
-  %arrayidx5.real.2 = load float, float* %arrayidx5.realp.2, align 8
-  %arrayidx5.imagp.2 = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 2, i32 1
-  %arrayidx5.imag.2 = load float, float* %arrayidx5.imagp.2, align 8
+  %arrayidx5.realp.2 = getelementptr inbounds [3 x { float, float }], ptr %a, i64 %indvars.iv, i64 2, i32 0
+  %arrayidx5.real.2 = load float, ptr %arrayidx5.realp.2, align 8
+  %arrayidx5.imagp.2 = getelementptr inbounds [3 x { float, float }], ptr %a, i64 %indvars.iv, i64 2, i32 1
+  %arrayidx5.imag.2 = load float, ptr %arrayidx5.imagp.2, align 8
   %mul.2 = fmul fast float %arrayidx5.real.2, %arrayidx5.real.2
   %mul9.2 = fmul fast float %arrayidx5.imag.2, %arrayidx5.imag.2
   %add.2 = fadd fast float %mul9.2, %mul.2
@@ -69,7 +69,7 @@ for.cond.for.end13_crit_edge:                     ; preds = %for.cond1.preheader
 
 for.end13:                                        ; preds = %for.cond.for.end13_crit_edge, %entry
   %sum.0.lcssa = phi float [ %phitmp, %for.cond.for.end13_crit_edge ], [ 0.000000e+00, %entry ]
-  store float %sum.0.lcssa, float* %r, align 4
+  store float %sum.0.lcssa, ptr %r, align 4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-altivec.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-altivec.ll
index ca5f7962d62fa..3f533f15fb83e 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-altivec.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-altivec.ll
@@ -11,7 +11,7 @@ declare float @atanhf(float) #0
 
 ; MASSV is unsupported for AltiVec.
 ; Check that massv entries are not generated.
-define void @cbrt_f64(double* nocapture %varray) {
+define void @cbrt_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @cbrt_f64(
 ; CHECK-NOT: __cbrtd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -24,8 +24,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @cbrt(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -34,7 +34,7 @@ for.end:
   ret void
 }
 
-define void @cbrt_f32(float* nocapture %varray) {
+define void @cbrt_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @cbrt_f32(
 ; CHECK-NOT: __cbrtf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -47,8 +47,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @cbrtf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -57,7 +57,7 @@ for.end:
   ret void
 }
 
-define void @atanh_f64(double* nocapture %varray) {
+define void @atanh_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @atanh_f64(
 ; CHECK-NOT: __atanhd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -70,8 +70,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @atanh(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -80,7 +80,7 @@ for.end:
   ret void
 }
 
-define void @atanh_f32(float* nocapture %varray) {
+define void @atanh_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @atanh_f32(
 ; CHECK-NOT: __atanhf4{{.*}}<2 x double>
 ; CHECK: ret void
@@ -93,8 +93,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @atanhf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-calls.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-calls.ll
index d79724a483f0c..c2871dd6eeb22 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-calls.ll
@@ -86,7 +86,7 @@ declare float @acoshf(float) #0
 declare double @atanh(double) #0
 declare float @atanhf(float) #0
 
-define void @cbrt_f64(double* nocapture %varray) {
+define void @cbrt_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @cbrt_f64(
 ; CHECK: __cbrtd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -99,8 +99,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @cbrt(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -109,7 +109,7 @@ for.end:
   ret void
 }
 
-define void @cbrt_f32(float* nocapture %varray) {
+define void @cbrt_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @cbrt_f32(
 ; CHECK: __cbrtf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -122,8 +122,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @cbrtf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -132,7 +132,7 @@ for.end:
   ret void
 }
 
-define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
+define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f64(
 ; CHECK:  __powd2{{.*}}<2 x double>
 ; CHECK:  ret void
@@ -144,11 +144,11 @@ for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
-  %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
-  %tmp1 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %exp, i64 %iv
+  %tmp1 = load double, ptr %arrayidx, align 4
   %tmp2 = tail call double @pow(double %conv, double %tmp1)
-  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %tmp2, double* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %tmp2, ptr %arrayidx2, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -157,7 +157,7 @@ for.end:
   ret void
 }
 
-define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) {
+define void @pow_f64_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f64_intrinsic(
 ; CHECK: __powd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -169,11 +169,11 @@ for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
-  %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
-  %tmp1 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %exp, i64 %iv
+  %tmp1 = load double, ptr %arrayidx, align 4
   %tmp2 = tail call double @llvm.pow.f64(double %conv, double %tmp1)
-  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %tmp2, double* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %tmp2, ptr %arrayidx2, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -182,7 +182,7 @@ for.end:
   ret void
 }
 
-define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
+define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32(
 ; CHECK: __powf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -194,11 +194,11 @@ for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
-  %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
-  %tmp1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %iv
+  %tmp1 = load float, ptr %arrayidx, align 4
   %tmp2 = tail call float @powf(float %conv, float %tmp1)
-  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %tmp2, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %tmp2, ptr %arrayidx2, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -207,7 +207,7 @@ for.end:
   ret void
 }
 
-define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) {
+define void @pow_f32_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32_intrinsic(
 ; CHECK: __powf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -219,11 +219,11 @@ for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
-  %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
-  %tmp1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %iv
+  %tmp1 = load float, ptr %arrayidx, align 4
   %tmp2 = tail call float @llvm.pow.f32(float %conv, float %tmp1)
-  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %tmp2, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %tmp2, ptr %arrayidx2, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -232,7 +232,7 @@ for.end:
   ret void
 }
 
-define void @sqrt_f64(double* nocapture %varray) {
+define void @sqrt_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @sqrt_f64(
 ; CHECK-NOT: __sqrtd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -245,8 +245,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @sqrt(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -255,7 +255,7 @@ for.end:
   ret void
 }
 
-define void @sqrt_f32(float* nocapture %varray) {
+define void @sqrt_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @sqrt_f32(
 ; CHECK-NOT: __sqrtf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -268,8 +268,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @sqrtf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -278,7 +278,7 @@ for.end:
   ret void
 }
 
-define void @exp_f64(double* nocapture %varray) {
+define void @exp_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f64(
 ; CHECK: __expd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -291,8 +291,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @exp(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -301,7 +301,7 @@ for.end:
   ret void
 }
 
-define void @exp_f64_intrinsic(double* nocapture %varray) {
+define void @exp_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f64_intrinsic(
 ; CHECK: __expd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -314,8 +314,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.exp.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -324,7 +324,7 @@ for.end:
   ret void
 }
 
-define void @exp_f32(float* nocapture %varray) {
+define void @exp_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32(
 ; CHECK: __expf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -337,8 +337,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @expf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -347,7 +347,7 @@ for.end:
   ret void
 }
 
-define void @exp_f32_intrinsic(float* nocapture %varray) {
+define void @exp_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32_intrinsic(
 ; CHECK: __expf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -360,8 +360,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.exp.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -370,7 +370,7 @@ for.end:
   ret void
 }
 
-define void @exp2_f64(double* nocapture %varray) {
+define void @exp2_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f64(
 ; CHECK: __exp2d2{{.*}}<2 x double>
 ; CHECK:  ret void
@@ -383,8 +383,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @exp2(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -393,7 +393,7 @@ for.end:
   ret void
 }
 
-define void @exp2_f64_intrinsic(double* nocapture %varray) {
+define void @exp2_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f64_intrinsic(
 ; CHECK: __exp2d2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -406,8 +406,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.exp2.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -416,7 +416,7 @@ for.end:
   ret void
 }
 
-define void @exp2_f32(float* nocapture %varray) {
+define void @exp2_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f32(
 ; CHECK: __exp2f4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -429,8 +429,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @exp2f(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -439,7 +439,7 @@ for.end:
   ret void
 }
 
-define void @exp2_f32_intrinsic(float* nocapture %varray) {
+define void @exp2_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f32_intrinsic(
 ; CHECK: __exp2f4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -452,8 +452,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.exp2.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -462,7 +462,7 @@ for.end:
   ret void
 }
 
-define void @expm1_f64(double* nocapture %varray) {
+define void @expm1_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @expm1_f64(
 ; CHECK: __expm1d2{{.*}}<2 x double>
 ; CHECK:  ret void
@@ -475,8 +475,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @expm1(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -485,7 +485,7 @@ for.end:
   ret void
 }
 
-define void @expm1_f32(float* nocapture %varray) {
+define void @expm1_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @expm1_f32(
 ; CHECK: __expm1f4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -498,8 +498,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @expm1f(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -508,7 +508,7 @@ for.end:
   ret void
 }
 
-define void @log_f64(double* nocapture %varray) {
+define void @log_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f64(
 ; CHECK: __logd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -521,8 +521,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @log(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -531,7 +531,7 @@ for.end:
   ret void
 }
 
-define void @log_f64_intrinsic(double* nocapture %varray) {
+define void @log_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f64_intrinsic(
 ; CHECK: __logd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -544,8 +544,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.log.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -554,7 +554,7 @@ for.end:
   ret void
 }
 
-define void @log_f32(float* nocapture %varray) {
+define void @log_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f32(
 ; CHECK: __logf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -567,8 +567,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @logf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -577,7 +577,7 @@ for.end:
   ret void
 }
 
-define void @log_f32_intrinsic(float* nocapture %varray) {
+define void @log_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f32_intrinsic(
 ; CHECK: __logf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -590,8 +590,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.log.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -600,7 +600,7 @@ for.end:
   ret void
 }
 
-define void @log1p_f64(double* nocapture %varray) {
+define void @log1p_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @log1p_f64(
 ; CHECK: __log1pd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -613,8 +613,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @log1p(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -623,7 +623,7 @@ for.end:
   ret void
 }
 
-define void @log1p_f32(float* nocapture %varray) {
+define void @log1p_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log1p_f32(
 ; CHECK: __log1pf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -636,8 +636,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @log1pf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -646,7 +646,7 @@ for.end:
   ret void
 }
 
-define void @log10_f64(double* nocapture %varray) {
+define void @log10_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f64(
 ; CHECK: __log10d2(<2 x double>
 ; CHECK: ret void
@@ -659,8 +659,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @log10(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -669,7 +669,7 @@ for.end:
   ret void
 }
 
-define void @log10_f64_intrinsic(double* nocapture %varray) {
+define void @log10_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f64_intrinsic(
 ; CHECK: __log10d2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -682,8 +682,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.log10.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -692,7 +692,7 @@ for.end:
   ret void
 }
 
-define void @log10_f32(float* nocapture %varray) {
+define void @log10_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f32(
 ; CHECK: __log10f4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -705,8 +705,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @log10f(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -715,7 +715,7 @@ for.end:
   ret void
 }
 
-define void @log10_f32_intrinsic(float* nocapture %varray) {
+define void @log10_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f32_intrinsic(
 ; CHECK: __log10f4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -728,8 +728,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.log10.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -738,7 +738,7 @@ for.end:
   ret void
 }
 
-define void @log2_f64(double* nocapture %varray) {
+define void @log2_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f64(
 ; CHECK: __log2d2(<2 x double>
 ; CHECK: ret void
@@ -751,8 +751,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @log2(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -761,7 +761,7 @@ for.end:
   ret void
 }
 
-define void @log2_f64_intrinsic(double* nocapture %varray) {
+define void @log2_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f64_intrinsic(
 ; CHECK: __log2d2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -774,8 +774,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.log2.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -784,7 +784,7 @@ for.end:
   ret void
 }
 
-define void @log2_f32(float* nocapture %varray) {
+define void @log2_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f32(
 ; CHECK: __log2f4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -797,8 +797,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @log2f(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -807,7 +807,7 @@ for.end:
   ret void
 }
 
-define void @log2_f32_intrinsic(float* nocapture %varray) {
+define void @log2_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f32_intrinsic(
 ; CHECK: __log2f4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -820,8 +820,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.log2.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -830,7 +830,7 @@ for.end:
   ret void
 }
 
-define void @sin_f64(double* nocapture %varray) {
+define void @sin_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f64(
 ; CHECK: __sind2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -843,8 +843,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @sin(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -853,7 +853,7 @@ for.end:
   ret void
 }
 
-define void @sin_f64_intrinsic(double* nocapture %varray) {
+define void @sin_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f64_intrinsic(
 ; CHECK: __sind2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -866,8 +866,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.sin.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -876,7 +876,7 @@ for.end:
   ret void
 }
 
-define void @sin_f32(float* nocapture %varray) {
+define void @sin_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f32(
 ; CHECK: __sinf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -889,8 +889,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @sinf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -899,7 +899,7 @@ for.end:
   ret void
 }
 
-define void @sin_f32_intrinsic(float* nocapture %varray) {
+define void @sin_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f32_intrinsic(
 ; CHECK: __sinf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -912,8 +912,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.sin.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -922,7 +922,7 @@ for.end:
   ret void
 }
 
-define void @cos_f64(double* nocapture %varray) {
+define void @cos_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f64(
 ; CHECK: __cosd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -935,8 +935,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @cos(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -945,7 +945,7 @@ for.end:
   ret void
 }
 
-define void @cos_f64_intrinsic(double* nocapture %varray) {
+define void @cos_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f64_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <2 x double> @__cosd2(<2 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -958,8 +958,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.cos.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -968,7 +968,7 @@ for.end:
   ret void
 }
 
-define void @cos_f32(float* nocapture %varray) {
+define void @cos_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f32(
 ; CHECK: __cosf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -981,8 +981,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @cosf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -991,7 +991,7 @@ for.end:
   ret void
 }
 
-define void @cos_f32_intrinsic(float* nocapture %varray) {
+define void @cos_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f32_intrinsic(
 ; CHECK: __cosf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1004,8 +1004,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.cos.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1014,7 +1014,7 @@ for.end:
   ret void
 }
 
-define void @tan_f64(double* nocapture %varray) {
+define void @tan_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @tan_f64(
 ; CHECK: __tand2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1027,8 +1027,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @tan(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1037,7 +1037,7 @@ for.end:
   ret void
 }
 
-define void @tan_f32(float* nocapture %varray) {
+define void @tan_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @tan_f32(
 ; CHECK: __tanf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1050,8 +1050,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @tanf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1060,7 +1060,7 @@ for.end:
   ret void
 }
 
-define void @asin_f64(double* nocapture %varray) {
+define void @asin_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @asin_f64(
 ; CHECK: __asind2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1073,8 +1073,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @asin(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1083,7 +1083,7 @@ for.end:
   ret void
 }
 
-define void @asin_f32(float* nocapture %varray) {
+define void @asin_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @asin_f32(
 ; CHECK: __asinf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1096,8 +1096,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @asinf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1106,7 +1106,7 @@ for.end:
   ret void
 }
 
-define void @acos_f64(double* nocapture %varray) {
+define void @acos_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @acos_f64(
 ; CHECK: __acosd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1119,8 +1119,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @acos(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1129,7 +1129,7 @@ for.end:
   ret void
 }
 
-define void @acos_f32(float* nocapture %varray) {
+define void @acos_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @acos_f32(
 ; CHECK: __acosf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1142,8 +1142,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @acosf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1152,7 +1152,7 @@ for.end:
   ret void
 }
 
-define void @atan_f64(double* nocapture %varray) {
+define void @atan_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @atan_f64(
 ; CHECK: __atand2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1165,8 +1165,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @atan(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1175,7 +1175,7 @@ for.end:
   ret void
 }
 
-define void @atan_f32(float* nocapture %varray) {
+define void @atan_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @atan_f32(
 ; CHECK: __atanf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1188,8 +1188,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @atanf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1198,7 +1198,7 @@ for.end:
   ret void
 }
 
-define void @atan2_f64(double* nocapture %varray) {
+define void @atan2_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @atan2_f64(
 ; CHECK: __atan2d2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1211,8 +1211,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @atan2(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1221,7 +1221,7 @@ for.end:
   ret void
 }
 
-define void @atan2_f32(float* nocapture %varray) {
+define void @atan2_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @atan2_f32(
 ; CHECK: __atan2f4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1234,8 +1234,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @atan2f(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1244,7 +1244,7 @@ for.end:
   ret void
 }
 
-define void @sinh_f64(double* nocapture %varray) {
+define void @sinh_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @sinh_f64(
 ; CHECK: __sinhd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1257,8 +1257,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @sinh(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1267,7 +1267,7 @@ for.end:
   ret void
 }
 
-define void @sinh_f32(float* nocapture %varray) {
+define void @sinh_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @sinh_f32(
 ; CHECK: __sinhf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1280,8 +1280,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @sinhf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1290,7 +1290,7 @@ for.end:
   ret void
 }
 
-define void @cosh_f64(double* nocapture %varray) {
+define void @cosh_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @cosh_f64(
 ; CHECK: __coshd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1303,8 +1303,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @cosh(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1313,7 +1313,7 @@ for.end:
   ret void
 }
 
-define void @cosh_f32(float* nocapture %varray) {
+define void @cosh_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @cosh_f32(
 ; CHECK: __coshf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1326,8 +1326,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @coshf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1336,7 +1336,7 @@ for.end:
   ret void
 }
 
-define void @tanh_f64(double* nocapture %varray) {
+define void @tanh_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @tanh_f64(
 ; CHECK: __tanhd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1349,8 +1349,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @tanh(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1359,7 +1359,7 @@ for.end:
   ret void
 }
 
-define void @tanh_f32(float* nocapture %varray) {
+define void @tanh_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @tanh_f32(
 ; CHECK: __tanhf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1372,8 +1372,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @tanhf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1382,7 +1382,7 @@ for.end:
   ret void
 }
 
-define void @asinh_f64(double* nocapture %varray) {
+define void @asinh_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @asinh_f64(
 ; CHECK: __asinhd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1395,8 +1395,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @asinh(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1405,7 +1405,7 @@ for.end:
   ret void
 }
 
-define void @asinh_f32(float* nocapture %varray) {
+define void @asinh_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @asinh_f32(
 ; CHECK: __asinhf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1418,8 +1418,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @asinhf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1428,7 +1428,7 @@ for.end:
   ret void
 }
 
-define void @acosh_f64(double* nocapture %varray) {
+define void @acosh_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @acosh_f64(
 ; CHECK: __acoshd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1441,8 +1441,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @acosh(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1451,7 +1451,7 @@ for.end:
   ret void
 }
 
-define void @acosh_f32(float* nocapture %varray) {
+define void @acosh_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @acosh_f32(
 ; CHECK: __acoshf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1464,8 +1464,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @acoshf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1474,7 +1474,7 @@ for.end:
   ret void
 }
 
-define void @atanh_f64(double* nocapture %varray) {
+define void @atanh_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @atanh_f64(
 ; CHECK: __atanhd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1487,8 +1487,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @atanh(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1497,7 +1497,7 @@ for.end:
   ret void
 }
 
-define void @atanh_f32(float* nocapture %varray) {
+define void @atanh_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @atanh_f32(
 ; CHECK: __atanhf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1510,8 +1510,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @atanhf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-nobuiltin.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-nobuiltin.ll
index de69c620b9cd8..28466fbc2b44c 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-nobuiltin.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-nobuiltin.ll
@@ -7,7 +7,7 @@ declare double @atanh(double) #1
 declare float @atanhf(float) #1
 
 ; Check that functions marked as nobuiltin are not lowered to massv entries.
-define void @atanh_f64(double* nocapture %varray) {
+define void @atanh_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @atanh_f64(
 ; CHECK-NOT: __atanhd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -20,8 +20,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @atanh(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -30,7 +30,7 @@ for.end:
   ret void
 }
 
-define void @atanh_f32(float* nocapture %varray) {
+define void @atanh_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @atanh_f32(
 ; CHECK-NOT: __atanhf4{{.*}}<2 x double>
 ; CHECK: ret void
@@ -43,8 +43,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @atanhf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-unsupported.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-unsupported.ll
index 9502dd089ed22..dde36970f9b95 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-unsupported.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-unsupported.ll
@@ -10,7 +10,7 @@ declare double @llvm.sqrt.f64(double) #0
 declare float @llvm.sqrt.f32(float) #0
 
 ; Vector counterpart of ceil is unsupported in MASSV library.
-define void @ceil_f64(double* nocapture %varray) {
+define void @ceil_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @ceil_f64(
 ; CHECK-NOT: __ceild2_massv{{.*}}<2 x double>
 ; CHECK-NOT: __ceild2_P8{{.*}}<2 x double>
@@ -25,8 +25,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @ceil(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -36,7 +36,7 @@ for.end:
 }
 
 ; Vector counterpart of fabs is unsupported in MASSV library.
-define void @fabs_f32(float* nocapture %varray) {
+define void @fabs_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @fabs_f32(
 ; CHECK-NOT: __fabsf4_massv{{.*}}<4 x float>
 ; CHECK-NOT: __fabsf4_P8{{.*}}<4 x float>
@@ -51,8 +51,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @fabsf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -63,7 +63,7 @@ for.end:
 
 ; sqrt intrinsics are converted to their vector counterpart intrinsics.
 ; They are not lowered to MASSV entries.
-define void @sqrt_f64_intrinsic(double* nocapture %varray) {
+define void @sqrt_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sqrt_f64_intrinsic(
 ; CHECK: llvm.sqrt.v2f64{{.*}}<2 x double>
 ; CHECK: ret void
@@ -76,8 +76,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.sqrt.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -86,7 +86,7 @@ for.end:
   ret void
 }
 
-define void @sqrt_f32_intrinsic(float* nocapture %varray) {
+define void @sqrt_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sqrt_f32_intrinsic(
 ; CHECK: llvm.sqrt.v4f32{{.*}}<4 x float>
 ; CHECK: ret void
@@ -99,8 +99,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.sqrt.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization-profitability.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization-profitability.ll
index 88cfa5bd40c54..b88254e7b678d 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization-profitability.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization-profitability.ll
@@ -18,7 +18,7 @@ target triple = "powerpc64le-unknown-linux-gnu"
 ; CHECK-NOT: vec.epilog.middle.block
 ; CHECK: ret void
 
-define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias %cc, i32 signext %N) #0 {
+define dso_local void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 signext %N) #0 {
 entry:
   %cmp1 = icmp sgt i32 %N, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
@@ -29,13 +29,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %bb, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %cc, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %bb, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %cc, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %add = fadd fast float %0, %1
-  %arrayidx4 = getelementptr inbounds float, float* %aa, i64 %indvars.iv
-  store float %add, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %aa, i64 %indvars.iv
+  store float %add, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.body, label %for.end.loopexit
@@ -56,7 +56,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; CHECK-NOT: vec.epilog.middle.block
 ; CHECK: ret void
 
-define dso_local void @f2(float* noalias %aa, float* noalias %bb, float* noalias %cc, i32 signext %N) #1 {
+define dso_local void @f2(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 signext %N) #1 {
 entry:
   %cmp1 = icmp sgt i32 %N, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
@@ -67,13 +67,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %bb, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %cc, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %bb, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %cc, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %add = fadd fast float %0, %1
-  %arrayidx4 = getelementptr inbounds float, float* %aa, i64 %indvars.iv
-  store float %add, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %aa, i64 %indvars.iv
+  store float %add, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.body, label %for.end.loopexit
@@ -104,7 +104,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; CHECK-MIN-4: vec.epilog.middle.block
 ; CHECK-MIN-4: ret void
 
-define dso_local void @f3(float* noalias %aa, float* noalias %bb, float* noalias %cc, i32 signext %N) {
+define dso_local void @f3(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 signext %N) {
 entry:
   %cmp1 = icmp sgt i32 %N, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
@@ -115,13 +115,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %bb, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %cc, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %bb, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %cc, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %add = fadd fast float %0, %1
-  %arrayidx4 = getelementptr inbounds float, float* %aa, i64 %indvars.iv
-  store float %add, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %aa, i64 %indvars.iv
+  store float %add, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.body, label %for.end.loopexit

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/pr41179.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/pr41179.ll
index 6267f7459cc4a..141a1533db178 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/pr41179.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/pr41179.ll
@@ -1,11 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -S -passes=loop-vectorize -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
 
-define void @foo(i8* %start, i8* %end) {
+define void @foo(ptr %start, ptr %end) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[START2:%.*]] = ptrtoint i8* [[START:%.*]] to i64
-; CHECK-NEXT:    [[END1:%.*]] = ptrtoint i8* [[END:%.*]] to i64
+; CHECK-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START:%.*]] to i64
+; CHECK-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END:%.*]] to i64
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[END1]] to i32
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[END1]], -1
 ; CHECK-NEXT:    [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[START2]], i64 [[TMP1]])
@@ -25,10 +25,10 @@ define void @foo(i8* %start, i8* %end) {
 ; CHECK-NEXT:    [[INDUCTION3:%.*]] = add i32 [[OFFSET_IDX]], -1
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i32 -1, [[INDUCTION]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = add nsw i32 -1, [[INDUCTION3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[END]], i32 [[TMP4]]
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[END]], i32 [[TMP5]]
-; CHECK-NEXT:    store i8 0, i8* [[TMP6]], align 1
-; CHECK-NEXT:    store i8 0, i8* [[TMP7]], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[END]], i32 [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[END]], i32 [[TMP5]]
+; CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+; CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -41,9 +41,9 @@ define void @foo(i8* %start, i8* %end) {
 ; CHECK:       while.body:
 ; CHECK-NEXT:    [[COUNT_09:%.*]] = phi i32 [ [[ADD:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[ADD]] = add nsw i32 -1, [[COUNT_09]]
-; CHECK-NEXT:    [[G:%.*]] = getelementptr i8, i8* [[END]], i32 [[ADD]]
-; CHECK-NEXT:    store i8 0, i8* [[G]], align 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8* [[START]], [[G]]
+; CHECK-NEXT:    [[G:%.*]] = getelementptr i8, ptr [[END]], i32 [[ADD]]
+; CHECK-NEXT:    store i8 0, ptr [[G]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult ptr [[START]], [[G]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       while.end.loopexit:
 ; CHECK-NEXT:    ret void
@@ -54,9 +54,9 @@ entry:
 while.body:                                       ; preds = %while.body, %entry
   %count.09 = phi i32 [ %add, %while.body ], [ 0, %entry ]
   %add = add nsw i32 -1, %count.09
-  %G = getelementptr i8, i8* %end, i32 %add
-  store i8 0, i8* %G
-  %cmp = icmp ult i8* %start, %G
+  %G = getelementptr i8, ptr %end, i32 %add
+  store i8 0, ptr %G
+  %cmp = icmp ult ptr %start, %G
   br i1 %cmp, label %while.body, label %while.end.loopexit
 
 while.end.loopexit:                               ; preds = %while.body

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
index 9823cf97e0d32..a567045c53c6e 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
@@ -19,7 +19,7 @@
 target datalayout = "e-m:e-i64:64-n32:64"
 target triple = "powerpc64le-ibm-linux-gnu"
 
-define void @test(double* nocapture readonly %arr, i32 signext %len) #0 {
+define void @test(ptr nocapture readonly %arr, i32 signext %len) #0 {
 entry:
   %cmp4 = icmp sgt i32 %len, 0
   br i1 %cmp4, label %for.body.lr.ph, label %for.end
@@ -31,8 +31,8 @@ for.body.lr.ph:                                   ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %redx.05 = phi double [ 0.000000e+00, %for.body.lr.ph ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %arr, i64 %indvars.iv
-  %1 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %arr, i64 %indvars.iv
+  %1 = load double, ptr %arrayidx, align 8
   %add = fadd fast double %1, %redx.05
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv to i32

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
index 5c82e200ab67f..e10acf43ff8a9 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
@@ -3,7 +3,7 @@ target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
 ; Function Attrs: nounwind
-define void @foo(double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 {
+define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) #0 {
 entry:
   br label %for.body
 
@@ -18,15 +18,15 @@ for.body:                                         ; preds = %for.body, %entry
   %0 = shl nsw i64 %indvars.iv, 1
   %odd.idx = add nsw i64 %0, 1
 
-  %arrayidx = getelementptr inbounds double, double* %b, i64 %0
-  %arrayidx.odd = getelementptr inbounds double, double* %b, i64 %odd.idx
+  %arrayidx = getelementptr inbounds double, ptr %b, i64 %0
+  %arrayidx.odd = getelementptr inbounds double, ptr %b, i64 %odd.idx
 
-  %1 = load double, double* %arrayidx, align 8
-  %2 = load double, double* %arrayidx.odd, align 8
+  %1 = load double, ptr %arrayidx, align 8
+  %2 = load double, ptr %arrayidx.odd, align 8
 
   %add = fadd double %1, %2
-  %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
-  store double %add, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
+  store double %add, ptr %arrayidx2, align 8
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1600
   br i1 %exitcond, label %for.cond.cleanup, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-only-for-real.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-only-for-real.ll
index dae0e5460e3fd..1bacb57700df8 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-only-for-real.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-only-for-real.ll
@@ -10,50 +10,46 @@ define zeroext i32 @test() #0 {
 entry:
   %a = alloca [1600 x i32], align 4
   %c = alloca [1600 x i32], align 4
-  %0 = bitcast [1600 x i32]* %a to i8*
-  call void @llvm.lifetime.start(i64 6400, i8* %0) #3
+  call void @llvm.lifetime.start(i64 6400, ptr %a) #3
   br label %for.body
 
 for.cond.cleanup:                                 ; preds = %for.body
-  %1 = bitcast [1600 x i32]* %c to i8*
-  call void @llvm.lifetime.start(i64 6400, i8* %1) #3
-  %arraydecay = getelementptr inbounds [1600 x i32], [1600 x i32]* %a, i64 0, i64 0
-  %arraydecay1 = getelementptr inbounds [1600 x i32], [1600 x i32]* %c, i64 0, i64 0
-  %call = call signext i32 @bar(i32* %arraydecay, i32* %arraydecay1) #3
+  call void @llvm.lifetime.start(i64 6400, ptr %c) #3
+  %call = call signext i32 @bar(ptr %a, ptr %c) #3
   br label %for.body6
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv25 = phi i64 [ 0, %entry ], [ %indvars.iv.next26, %for.body ]
-  %arrayidx = getelementptr inbounds [1600 x i32], [1600 x i32]* %a, i64 0, i64 %indvars.iv25
-  %2 = trunc i64 %indvars.iv25 to i32
-  store i32 %2, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1600 x i32], ptr %a, i64 0, i64 %indvars.iv25
+  %0 = trunc i64 %indvars.iv25 to i32
+  store i32 %0, ptr %arrayidx, align 4
   %indvars.iv.next26 = add nuw nsw i64 %indvars.iv25, 1
   %exitcond27 = icmp eq i64 %indvars.iv.next26, 1600
   br i1 %exitcond27, label %for.cond.cleanup, label %for.body
 
 for.cond.cleanup5:                                ; preds = %for.body6
-  call void @llvm.lifetime.end(i64 6400, i8* nonnull %1) #3
-  call void @llvm.lifetime.end(i64 6400, i8* %0) #3
+  call void @llvm.lifetime.end(i64 6400, ptr nonnull %c) #3
+  call void @llvm.lifetime.end(i64 6400, ptr %a) #3
   ret i32 %add
 
 for.body6:                                        ; preds = %for.body6, %for.cond.cleanup
   %indvars.iv = phi i64 [ 0, %for.cond.cleanup ], [ %indvars.iv.next, %for.body6 ]
   %s.022 = phi i32 [ 0, %for.cond.cleanup ], [ %add, %for.body6 ]
-  %arrayidx8 = getelementptr inbounds [1600 x i32], [1600 x i32]* %c, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %arrayidx8, align 4
-  %add = add i32 %3, %s.022
+  %arrayidx8 = getelementptr inbounds [1600 x i32], ptr %c, i64 0, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx8, align 4
+  %add = add i32 %1, %s.022
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1600
   br i1 %exitcond, label %for.cond.cleanup5, label %for.body6
 }
 
 ; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+declare void @llvm.lifetime.start(i64, ptr nocapture) #1
 
 ; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+declare void @llvm.lifetime.end(i64, ptr nocapture) #1
 
-declare signext i32 @bar(i32*, i32*) #2
+declare signext i32 @bar(ptr, ptr) #2
 
 attributes #0 = { nounwind "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" }
 attributes #1 = { argmemonly nounwind }

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
index 28c105ec6f525..41d9fe0407ef3 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
@@ -9,7 +9,7 @@ target triple = "powerpc64-unknown-linux-gnu"
 
 define signext i32 @s173() #0 {
 entry:
-  %0 = load i32, i32* @ntimes, align 4
+  %0 = load i32, ptr @ntimes, align 4
   %cmp21 = icmp sgt i32 %0, 0
   br i1 %cmp21, label %for.cond1.preheader, label %for.end12
 
@@ -19,21 +19,21 @@ for.cond1.preheader:                              ; preds = %for.end, %entry
 
 for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
   %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
-  %arrayidx = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 0, i64 %indvars.iv
-  %1 = load float, float* %arrayidx, align 4
-  %arrayidx5 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 3, i64 %indvars.iv
-  %2 = load float, float* %arrayidx5, align 4
+  %arrayidx = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 0, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx, align 4
+  %arrayidx5 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 3, i64 %indvars.iv
+  %2 = load float, ptr %arrayidx5, align 4
   %add = fadd float %1, %2
   %3 = add nsw i64 %indvars.iv, 16000
-  %arrayidx8 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 0, i64 %3
-  store float %add, float* %arrayidx8, align 4
+  %arrayidx8 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 0, i64 %3
+  store float %add, ptr %arrayidx8, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 16000
   br i1 %exitcond, label %for.end, label %for.body3
 
 for.end:                                          ; preds = %for.body3
   %inc11 = add nsw i32 %nl.022, 1
-  %4 = load i32, i32* @ntimes, align 4
+  %4 = load i32, ptr @ntimes, align 4
   %mul = mul nsw i32 %4, 10
   %cmp = icmp slt i32 %inc11, %mul
   br i1 %cmp, label %for.cond1.preheader, label %for.end12
@@ -42,7 +42,7 @@ for.end12:                                        ; preds = %for.end, %entry
   ret i32 0
 
 ; CHECK-LABEL: @s173
-; CHECK: load <4 x float>, <4 x float>*
+; CHECK: load <4 x float>, ptr
 ; CHECK: add nsw i64 %index, 16000
 ; CHECK: ret i32 0
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-call.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-call.ll
index a53c2bdb8e3a3..64eb35ebe8534 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-call.ll
@@ -3,7 +3,7 @@
 ; RUN:   -vectorizer-maximize-bandwidth -passes='default<O2>,inject-tli-mappings,loop-vectorize' \
 ; RUN:   -mtriple=powerpc64le-unknown-linux -S -mcpu=pwr9 2>&1 | FileCheck %s
 
-define dso_local double @test(float* %Arr) {
+define dso_local double @test(ptr %Arr) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -11,9 +11,8 @@ define dso_local double @test(float* %Arr) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x double> [ zeroinitializer, [[ENTRY]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[ARR:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[TMP1]] to <2 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[ARR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fpext <2 x float> [[WIDE_LOAD]] to <2 x double>
 ; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast <2 x double> @__sind2(<2 x double> [[TMP3]])
 ; CHECK-NEXT:    [[TMP5]] = fadd fast <2 x double> [[TMP4]], [[VEC_PHI]]
@@ -39,8 +38,8 @@ for.cond.cleanup:
 
 for.body:
   %idxprom = sext i32 %i.0 to i64
-  %arrayidx = getelementptr inbounds float, float* %Arr, i64 %idxprom
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %Arr, i64 %idxprom
+  %0 = load float, ptr %arrayidx, align 4
   %conv = fpext float %0 to double
   %1 = call fast double @llvm.sin.f64(double %conv)
   %add = fadd fast double %Sum.0, %1

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll
index 4895943831e07..aafe849b7042a 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -vectorizer-maximize-bandwidth -mtriple=powerpc64le-- -S \
 ; RUN: -passes=loop-simplify,loop-rotate,loop-vectorize,instcombine,simplifycfg -simplifycfg-require-and-preserve-domtree=1 -force-vector-interleave=1 < %s | FileCheck %s
-define dso_local double @test(float* %Arr) {
+define dso_local double @test(ptr %Arr) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -9,9 +9,8 @@ define dso_local double @test(float* %Arr) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x double> [ zeroinitializer, [[ENTRY]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[ARR:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[TMP1]] to <2 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[ARR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fpext <2 x float> [[WIDE_LOAD]] to <2 x double>
 ; CHECK-NEXT:    [[TMP4:%.*]] = call fast <2 x double> @__sind2_P8(<2 x double> [[TMP3]])
 ; CHECK-NEXT:    [[TMP5]] = fadd fast <2 x double> [[VEC_PHI]], [[TMP4]]
@@ -36,8 +35,8 @@ for.cond.cleanup:
 
 for.body:
   %idxprom = sext i32 %i.0 to i64
-  %arrayidx = getelementptr inbounds float, float* %Arr, i64 %idxprom
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %Arr, i64 %idxprom
+  %0 = load float, ptr %arrayidx, align 4
   %conv = fpext float %0 to double
   %1 = call fast double @llvm.sin.f64(double %conv) #1
   %add = fadd fast double %Sum.0, %1

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll
index 2bbaf9d59ff26..d51feae33e72e 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll
@@ -9,7 +9,7 @@
 ; CHECK: %{{.*}} = add {{.*}}, 8
 
 ; Function Attrs: nofree norecurse nosync nounwind writeonly
-define dso_local void @foo(i32 signext %n, i32* nocapture %A) local_unnamed_addr #0 {
+define dso_local void @foo(i32 signext %n, ptr nocapture %A) local_unnamed_addr #0 {
 entry:
   %cmp5 = icmp sgt i32 %n, 0
   br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup
@@ -26,9 +26,9 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
   %0 = trunc i64 %indvars.iv to i32
-  store i32 %0, i32* %arrayidx, align 4, !tbaa !4
+  store i32 %0, ptr %arrayidx, align 4, !tbaa !4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !8

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll
index 7f1603151220b..769fc02a1bef2 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll
@@ -10,7 +10,7 @@
 ; ADD
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define i32 @add(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @add(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @add
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -26,8 +26,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi i32 [ 2, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -40,7 +40,7 @@ for.end:                                 ; preds = %for.body, %entry
 ; OR
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define i32 @or(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @or(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @or
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -56,8 +56,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi i32 [ 2, %entry ], [ %or, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %or = or i32 %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -70,7 +70,7 @@ for.end:                                 ; preds = %for.body, %entry
 ; AND
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define i32 @and(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @and(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @and
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -86,8 +86,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi i32 [ 2, %entry ], [ %and, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %and = and i32 %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -100,7 +100,7 @@ for.end:                                 ; preds = %for.body, %entry
 ; XOR
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define i32 @xor(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @xor(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @xor
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -116,8 +116,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi i32 [ 2, %entry ], [ %xor, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %xor = xor i32 %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -130,7 +130,7 @@ for.end:                                 ; preds = %for.body, %entry
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
 ; SMIN
 
-define i32 @smin(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @smin(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @smin
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -149,8 +149,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.010 = phi i32 [ 2, %entry ], [ %.sroa.speculated, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp.i = icmp slt i32 %0, %sum.010
   %.sroa.speculated = select i1 %cmp.i, i32 %0, i32 %sum.010
   %iv.next = add nuw nsw i64 %iv, 1
@@ -164,7 +164,7 @@ for.end:
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
 ; UMAX
 
-define i32 @umax(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @umax(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @umax
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -183,8 +183,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.010 = phi i32 [ 2, %entry ], [ %.sroa.speculated, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp.i = icmp ugt i32 %0, %sum.010
   %.sroa.speculated = select i1 %cmp.i, i32 %0, i32 %sum.010
   %iv.next = add nuw nsw i64 %iv, 1
@@ -198,7 +198,7 @@ for.end:
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
 ; FADD (FAST)
 
-define float @fadd_fast(float* noalias nocapture readonly %a, i64 %n) {
+define float @fadd_fast(ptr noalias nocapture readonly %a, i64 %n) {
 ; CHECK-LABEL: @fadd_fast
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float>
@@ -214,8 +214,8 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd fast float %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -227,7 +227,7 @@ for.end:
 
 ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
 ; CHECK-REMARK: vectorized loop (vectorization width: 8, interleaved count: 2)
-define bfloat @fadd_fast_bfloat(bfloat* noalias nocapture readonly %a, i64 %n) {
+define bfloat @fadd_fast_bfloat(ptr noalias nocapture readonly %a, i64 %n) {
 ; CHECK-LABEL: @fadd_fast_bfloat
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <8 x bfloat>
@@ -243,8 +243,8 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds bfloat, bfloat* %a, i64 %iv
-  %0 = load bfloat, bfloat* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv
+  %0 = load bfloat, ptr %arrayidx, align 4
   %add = fadd fast bfloat %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -257,7 +257,7 @@ for.end:
 ; FMIN (FAST)
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define float @fmin_fast(float* noalias nocapture readonly %a, i64 %n) #0 {
+define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
 ; CHECK-LABEL: @fmin_fast
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float>
@@ -276,8 +276,8 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp.i = fcmp olt float %0, %sum.07
   %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
@@ -291,7 +291,7 @@ for.end:
 ; FMAX (FAST)
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define float @fmax_fast(float* noalias nocapture readonly %a, i64 %n) #0 {
+define float @fmax_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
 ; CHECK-LABEL: @fmax_fast
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float>
@@ -310,8 +310,8 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp.i = fcmp fast ogt float %0, %sum.07
   %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
@@ -328,7 +328,7 @@ for.end:
 
 ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
 ; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2)
-define i32 @mul(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @mul(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @mul
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <4 x i32>
@@ -344,8 +344,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi i32 [ 2, %entry ], [ %mul, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %mul = mul nsw i32 %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -358,7 +358,7 @@ for.end:                                 ; preds = %for.body, %entry
 ; Note: This test was added to ensure we always check the legality of reductions (and emit a warning if necessary) before checking for memory dependencies
 ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
 ; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2)
-define i32 @memory_dependence(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) {
+define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @memory_dependence
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <4 x i32>
@@ -378,14 +378,14 @@ entry:
 for.body:
   %i = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %sum = phi i32 [ %mul, %for.body ], [ 2, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %i
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 %i
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %i
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %b, i64 %i
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %0
   %add2 = add nuw nsw i64 %i, 32
-  %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %add2
-  store i32 %add, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 %add2
+  store i32 %add, ptr %arrayidx3, align 4
   %mul = mul nsw i32 %1, %sum
   %inc = add nuw nsw i64 %i, 1
   %exitcond.not = icmp eq i64 %inc, %n
@@ -396,7 +396,7 @@ for.end:
 }
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 2, interleaved count: 2)
-define float @fmuladd(float* %a, float* %b, i64 %n) {
+define float @fmuladd(ptr %a, ptr %b, i64 %n) {
 ; CHECK-LABEL: @fmuladd(
 ; CHECK: vector.body:
 ; CHECK: [[WIDE_LOAD:%.*]] = load <vscale x 2 x float>
@@ -415,10 +415,10 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %b, i64 %iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %iv
+  %1 = load float, ptr %arrayidx2, align 4
   %muladd = tail call reassoc float @llvm.fmuladd.f32(float %0, float %1, float %sum.07)
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll
index b4ad393272fdb..3586f78d54983 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll
@@ -10,20 +10,20 @@
 ; }
 ;
 ; CHECK: <vscale x 2 x i32>
-define void @test(i32* %a, i32* %b) {
+define void @test(ptr %a, ptr %b) {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 64
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !6

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll b/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll
index f41acc67e2f8c..53a809ddfd483 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll
@@ -3,7 +3,7 @@
 
 ; Make sure we don't unroll scalar loops in the loop vectorizer.
 ;
-define void @small_loop(i32* nocapture %inArray, i32 %size) nounwind {
+define void @small_loop(ptr nocapture %inArray, i32 %size) nounwind {
 ; CHECK-LABEL: @small_loop(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0
@@ -12,10 +12,10 @@ define void @small_loop(i32* nocapture %inArray, i32 %size) nounwind {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV1:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[INARRAY:%.*]], i32 [[IV]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[INARRAY:%.*]], i32 [[IV]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = add nsw i32 [[TMP2]], 6
-; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP1]], align 4
+; CHECK-NEXT:    store i32 [[TMP3]], ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[IV1]] = add i32 [[IV]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[IV1]], [[SIZE]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[EXIT_LOOPEXIT:%.*]], label [[LOOP]]
@@ -30,10 +30,10 @@ entry:
 
 loop:                                          ; preds = %entry, %loop
   %iv = phi i32 [ %iv1, %loop ], [ 0, %entry ]
-  %1 = getelementptr inbounds i32, i32* %inArray, i32 %iv
-  %2 = load i32, i32* %1, align 4
+  %1 = getelementptr inbounds i32, ptr %inArray, i32 %iv
+  %2 = load i32, ptr %1, align 4
   %3 = add nsw i32 %2, 6
-  store i32 %3, i32* %1, align 4
+  store i32 %3, ptr %1, align 4
   %iv1 = add i32 %iv, 1
   %cond = icmp eq i32 %iv1, %size
   br i1 %cond, label %exit, label %loop

diff  --git a/llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll
index 2f70b23ec552b..d8dbde455c7ca 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll
@@ -6,7 +6,7 @@
 
 ; Check that the addresses for a scalarized memory access is not extracted
 ; from a vector register.
-define i32 @foo(i32* nocapture %A) {
+define i32 @foo(ptr nocapture %A) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -17,10 +17,10 @@ define i32 @foo(i32* nocapture %A) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[TMP1]], 4
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]]
-; CHECK-NEXT:    store i32 4, i32* [[TMP3]], align 4
-; CHECK-NEXT:    store i32 4, i32* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]]
+; CHECK-NEXT:    store i32 4, ptr [[TMP3]], align 4
+; CHECK-NEXT:    store i32 4, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -40,8 +40,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = shl nsw i64 %indvars.iv, 2
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0
-  store i32 4, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %0
+  store i32 4, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 10000
@@ -53,7 +53,7 @@ for.end:
 
 
 ; Check that a load of address is scalarized.
-define i32 @foo1(i32* nocapture noalias %A, i32** nocapture %PtrPtr) {
+define i32 @foo1(ptr nocapture noalias %A, ptr nocapture %PtrPtr) {
 ; CHECK-LABEL: @foo1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -62,17 +62,16 @@ define i32 @foo1(i32* nocapture noalias %A, i32** nocapture %PtrPtr) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32*, i32** [[PTRPTR:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32*, i32** [[PTRPTR]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[TMP2]], align 8
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[PTRPTR:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[PTRPTR]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i64 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[TMP8]], <2 x i32>* [[TMP10]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <2 x i32> [[TMP8]], ptr [[TMP9]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -91,11 +90,11 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %ptr = getelementptr inbounds i32*, i32** %PtrPtr, i64 %indvars.iv
-  %el = load i32*, i32** %ptr
-  %v = load i32, i32* %el
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %v, i32* %arrayidx, align 4
+  %ptr = getelementptr inbounds ptr, ptr %PtrPtr, i64 %indvars.iv
+  %el = load ptr, ptr %ptr
+  %v = load i32, ptr %el
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  store i32 %v, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 10000

diff  --git a/llvm/test/Transforms/LoopVectorize/SystemZ/branch-for-predicated-block.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/branch-for-predicated-block.ll
index 4b02e81a2c716..c8686d97838b3 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/branch-for-predicated-block.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/branch-for-predicated-block.ll
@@ -8,20 +8,20 @@
 ; vector compare plus a test under mask instruction. This cost is modelled on
 ; the extractelement of i1.
 
-define void @fun(i32* %arr, i64 %trip.count) {
+define void @fun(ptr %arr, i64 %trip.count) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %indvars.iv
-  %l = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %arr, i64 %indvars.iv
+  %l = load i32, ptr %arrayidx, align 4
   %cmp55 = icmp sgt i32 %l, 0
   br i1 %cmp55, label %if.then, label %for.inc
 
 if.then:
   %sub = sub nsw i32 0, %l
-  store i32 %sub, i32* %arrayidx, align 4
+  store i32 %sub, ptr %arrayidx, align 4
   br label %for.inc
 
 for.inc:

diff  --git a/llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-1.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-1.ll
index c5bc23de20d00..6de9a175d55ad 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-1.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-1.ll
@@ -7,15 +7,15 @@
 ; Check that a scalarized load does not get a zero cost in a vectorized
 ; loop. It can only be folded into the add operand in the scalar loop.
 
-define i32 @fun(i64* %data, i64 %n, i64 %s, i32* %Src) {
+define i32 @fun(ptr %data, i64 %n, i64 %s, ptr %Src) {
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %acc = phi i32 [ 0, %entry ], [ %acc_next, %for.body ]
-  %gep = getelementptr inbounds i32, i32* %Src, i64 %iv
-  %ld = load i32, i32* %gep
+  %gep = getelementptr inbounds i32, ptr %Src, i64 %iv
+  %ld = load i32, ptr %gep
   %acc_next = add i32 %acc, %ld
   %iv.next = add nuw nsw i64 %iv, 2
   %cmp110.us = icmp slt i64 %iv.next, %n
@@ -24,5 +24,5 @@ for.body:
 for.end:
   ret i32 %acc_next
 
-; CHECK: Found an estimated cost of 4 for VF 4 For instruction:   %ld = load i32, i32* %gep
+; CHECK: Found an estimated cost of 4 for VF 4 For instruction:   %ld = load i32, ptr %gep
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll
index 399bd4470cea6..deb81099ac676 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll
@@ -7,16 +7,16 @@
 ; Check that a scalarized load/store does not get a cost for insterts/
 ; extracts, since z13 supports element load/store.
 
-define void @fun(i32* %data, i64 %n) {
+define void @fun(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds i32, i32* %data, i64 %i
-  %tmp1 = load i32, i32* %tmp0, align 4
+  %tmp0 = getelementptr inbounds i32, ptr %data, i64 %i
+  %tmp1 = load i32, ptr %tmp0, align 4
   %tmp2 = add i32 %tmp1, 1
-  store i32 %tmp2, i32* %tmp0, align 4
+  store i32 %tmp2, ptr %tmp0, align 4
   %i.next = add nuw nsw i64 %i, 2
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -24,10 +24,10 @@ for.body:
 for.end:
   ret void
 
-; CHECK: LV: Scalarizing:  %tmp1 = load i32, i32* %tmp0, align 4
-; CHECK: LV: Scalarizing:  store i32 %tmp2, i32* %tmp0, align 4
+; CHECK: LV: Scalarizing:  %tmp1 = load i32, ptr %tmp0, align 4
+; CHECK: LV: Scalarizing:  store i32 %tmp2, ptr %tmp0, align 4
 
-; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   %tmp1 = load i32, i32* %tmp0, align 4
-; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   store i32 %tmp2, i32* %tmp0, align 4
+; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   %tmp1 = load i32, ptr %tmp0, align 4
+; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   store i32 %tmp2, ptr %tmp0, align 4
 }
 

diff  --git a/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-02.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-02.ll
index ad45fba8100e1..c75899e2224f1 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-02.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-02.ll
@@ -15,28 +15,27 @@
 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %ld1 = load i16
 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %ld2 = load i16
 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %ld3 = load i16
-define void @fun0(i16 *%ptr, i16 *%dst) {
+define void @fun0(ptr %ptr, ptr %dst) {
 entry:
   br label %for.body
 
 for.body:
-  %ivptr = phi i16* [ %ptr.next, %for.body ], [ %ptr, %entry ]
+  %ivptr = phi ptr [ %ptr.next, %for.body ], [ %ptr, %entry ]
   %iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %inc = add i64 %iv, 4
-  %ptr0 = getelementptr inbounds i16, i16* %ivptr, i64 0
-  %ld0 = load i16, i16* %ptr0
-  %ptr1 = getelementptr inbounds i16, i16* %ivptr, i64 1
-  %ld1 = load i16, i16* %ptr1
-  %ptr2 = getelementptr inbounds i16, i16* %ivptr, i64 2
-  %ld2 = load i16, i16* %ptr2
-  %ptr3 = getelementptr inbounds i16, i16* %ivptr, i64 3
-  %ld3 = load i16, i16* %ptr3
+  %ld0 = load i16, ptr %ivptr
+  %ptr1 = getelementptr inbounds i16, ptr %ivptr, i64 1
+  %ld1 = load i16, ptr %ptr1
+  %ptr2 = getelementptr inbounds i16, ptr %ivptr, i64 2
+  %ld2 = load i16, ptr %ptr2
+  %ptr3 = getelementptr inbounds i16, ptr %ivptr, i64 3
+  %ld3 = load i16, ptr %ptr3
   %a1 = add i16 %ld0, %ld1
   %a2 = add i16 %a1, %ld2
   %a3 = add i16 %a2, %ld3
-  %dstptr = getelementptr inbounds i16, i16* %dst, i64 %iv
-  store i16 %a3, i16* %dstptr
-  %ptr.next = getelementptr inbounds i16, i16* %ivptr, i64 4
+  %dstptr = getelementptr inbounds i16, ptr %dst, i64 %iv
+  store i16 %a3, ptr %dstptr
+  %ptr.next = getelementptr inbounds i16, ptr %ivptr, i64 4
   %cmp = icmp eq i64 %inc, 100
   br i1 %cmp, label %for.end, label %for.body
 
@@ -50,19 +49,18 @@ for.end:
 ;
 ; CHECK: LV: Checking a loop in 'fun1'
 ; CHECK: LV: Found an estimated cost of 5 for VF 16 For instruction:   %ld0 = load i8
-define void @fun1(i8 *%ptr, i8 *%dst) {
+define void @fun1(ptr %ptr, ptr %dst) {
 entry:
   br label %for.body
 
 for.body:
-  %ivptr = phi i8* [ %ptr.next, %for.body ], [ %ptr, %entry ]
+  %ivptr = phi ptr [ %ptr.next, %for.body ], [ %ptr, %entry ]
   %iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %inc = add i64 %iv, 4
-  %ptr0 = getelementptr inbounds i8, i8* %ivptr, i64 0
-  %ld0 = load i8, i8* %ptr0
-  %dstptr = getelementptr inbounds i8, i8* %dst, i64 %iv
-  store i8 %ld0, i8* %dstptr
-  %ptr.next = getelementptr inbounds i8, i8* %ivptr, i64 3
+  %ld0 = load i8, ptr %ivptr
+  %dstptr = getelementptr inbounds i8, ptr %dst, i64 %iv
+  store i8 %ld0, ptr %dstptr
+  %ptr.next = getelementptr inbounds i8, ptr %ivptr, i64 3
   %cmp = icmp eq i64 %inc, 100
   br i1 %cmp, label %for.end, label %for.body
 
@@ -79,28 +77,27 @@ for.end:
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %ld1 = load i8
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %ld2 = load i8
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %ld3 = load i8
-define void @fun2(i8 *%ptr, i8 *%dst) {
+define void @fun2(ptr %ptr, ptr %dst) {
 entry:
   br label %for.body
 
 for.body:
-  %ivptr = phi i8* [ %ptr.next, %for.body ], [ %ptr, %entry ]
+  %ivptr = phi ptr [ %ptr.next, %for.body ], [ %ptr, %entry ]
   %iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %inc = add i64 %iv, 4
-  %ptr0 = getelementptr inbounds i8, i8* %ivptr, i64 0
-  %ld0 = load i8, i8* %ptr0
-  %ptr1 = getelementptr inbounds i8, i8* %ivptr, i64 1
-  %ld1 = load i8, i8* %ptr1
-  %ptr2 = getelementptr inbounds i8, i8* %ivptr, i64 2
-  %ld2 = load i8, i8* %ptr2
-  %ptr3 = getelementptr inbounds i8, i8* %ivptr, i64 3
-  %ld3 = load i8, i8* %ptr3
+  %ld0 = load i8, ptr %ivptr
+  %ptr1 = getelementptr inbounds i8, ptr %ivptr, i64 1
+  %ld1 = load i8, ptr %ptr1
+  %ptr2 = getelementptr inbounds i8, ptr %ivptr, i64 2
+  %ld2 = load i8, ptr %ptr2
+  %ptr3 = getelementptr inbounds i8, ptr %ivptr, i64 3
+  %ld3 = load i8, ptr %ptr3
   %a1 = add i8 %ld0, %ld1
   %a2 = add i8 %a1, %ld2
   %a3 = add i8 %a2, %ld3
-  %dstptr = getelementptr inbounds i8, i8* %dst, i64 %iv
-  store i8 %a3, i8* %dstptr
-  %ptr.next = getelementptr inbounds i8, i8* %ivptr, i64 32
+  %dstptr = getelementptr inbounds i8, ptr %dst, i64 %iv
+  store i8 %a3, ptr %dstptr
+  %ptr.next = getelementptr inbounds i8, ptr %ivptr, i64 32
   %cmp = icmp eq i64 %inc, 100
   br i1 %cmp, label %for.end, label %for.body
 
@@ -119,28 +116,27 @@ for.end:
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %ld1 = load i8
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %ld2 = load i8
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %ld3 = load i8
-define void @fun3(i8 *%ptr, i8 *%dst) {
+define void @fun3(ptr %ptr, ptr %dst) {
 entry:
   br label %for.body
 
 for.body:
-  %ivptr = phi i8* [ %ptr.next, %for.body ], [ %ptr, %entry ]
+  %ivptr = phi ptr [ %ptr.next, %for.body ], [ %ptr, %entry ]
   %iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %inc = add i64 %iv, 4
-  %ptr0 = getelementptr inbounds i8, i8* %ivptr, i64 0
-  %ld0 = load i8, i8* %ptr0
-  %ptr1 = getelementptr inbounds i8, i8* %ivptr, i64 1
-  %ld1 = load i8, i8* %ptr1
-  %ptr2 = getelementptr inbounds i8, i8* %ivptr, i64 2
-  %ld2 = load i8, i8* %ptr2
-  %ptr3 = getelementptr inbounds i8, i8* %ivptr, i64 3
-  %ld3 = load i8, i8* %ptr3
+  %ld0 = load i8, ptr %ivptr
+  %ptr1 = getelementptr inbounds i8, ptr %ivptr, i64 1
+  %ld1 = load i8, ptr %ptr1
+  %ptr2 = getelementptr inbounds i8, ptr %ivptr, i64 2
+  %ld2 = load i8, ptr %ptr2
+  %ptr3 = getelementptr inbounds i8, ptr %ivptr, i64 3
+  %ld3 = load i8, ptr %ptr3
   %a1 = add i8 %ld0, %ld1
   %a2 = add i8 %a1, %ld2
   %a3 = add i8 %a2, %ld3
-  %dstptr = getelementptr inbounds i8, i8* %dst, i64 %iv
-  store i8 %a3, i8* %dstptr
-  %ptr.next = getelementptr inbounds i8, i8* %ivptr, i64 30
+  %dstptr = getelementptr inbounds i8, ptr %dst, i64 %iv
+  store i8 %a3, ptr %dstptr
+  %ptr.next = getelementptr inbounds i8, ptr %ivptr, i64 30
   %cmp = icmp eq i64 %inc, 100
   br i1 %cmp, label %for.end, label %for.body
 

diff  --git a/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll
index e629110a95a49..ee81af23c933e 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll
@@ -9,16 +9,16 @@
 
 ; Simple case where just the load is interleaved, because the store group
 ; would have gaps.
-define void @fun0(i32* %data, i64 %n) {
+define void @fun0(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds i32, i32* %data, i64 %i
-  %tmp1 = load i32, i32* %tmp0, align 4
+  %tmp0 = getelementptr inbounds i32, ptr %data, i64 %i
+  %tmp1 = load i32, ptr %tmp0, align 4
   %tmp2 = add i32 %tmp1, 1
-  store i32 %tmp2, i32* %tmp0, align 4
+  store i32 %tmp2, ptr %tmp0, align 4
   %i.next = add nuw nsw i64 %i, 2
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -26,25 +26,25 @@ for.body:
 for.end:
   ret void
 
-; CHECK: LV: Creating an interleave group with:  %tmp1 = load i32, i32* %tmp0, align 4
-; CHECK: LV: Found an estimated cost of 3 for VF 4 For instruction:   %tmp1 = load i32, i32* %tmp0, align 4
+; CHECK: LV: Creating an interleave group with:  %tmp1 = load i32, ptr %tmp0, align 4
+; CHECK: LV: Found an estimated cost of 3 for VF 4 For instruction:   %tmp1 = load i32, ptr %tmp0, align 4
 ;        (vl; vl; vperm)
 }
 
 ; Interleaving of both load and stores.
-define void @fun1(i32* %data, i64 %n) {
+define void @fun1(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds i32, i32* %data, i64 %i
-  %tmp1 = load i32, i32* %tmp0, align 4
+  %tmp0 = getelementptr inbounds i32, ptr %data, i64 %i
+  %tmp1 = load i32, ptr %tmp0, align 4
   %i_1  = add i64 %i, 1
-  %tmp2 = getelementptr inbounds i32, i32* %data, i64 %i_1
-  %tmp3 = load i32, i32* %tmp2, align 4
-  store i32 %tmp1, i32* %tmp2, align 4
-  store i32 %tmp3, i32* %tmp0, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %data, i64 %i_1
+  %tmp3 = load i32, ptr %tmp2, align 4
+  store i32 %tmp1, ptr %tmp2, align 4
+  store i32 %tmp3, ptr %tmp0, align 4
   %i.next = add nuw nsw i64 %i, 2
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -52,19 +52,19 @@ for.body:
 for.end:
   ret void
 
-; CHECK: LV: Creating an interleave group with:  store i32 %tmp3, i32* %tmp0, align 4
-; CHECK: LV: Inserted:  store i32 %tmp1, i32* %tmp2, align 4
-; CHECK:     into the interleave group with  store i32 %tmp3, i32* %tmp0, align 4
-; CHECK: LV: Creating an interleave group with:  %tmp3 = load i32, i32* %tmp2, align 4
-; CHECK: LV: Inserted:  %tmp1 = load i32, i32* %tmp0, align 4
-; CHECK:     into the interleave group with  %tmp3 = load i32, i32* %tmp2, align 4
+; CHECK: LV: Creating an interleave group with:  store i32 %tmp3, ptr %tmp0, align 4
+; CHECK: LV: Inserted:  store i32 %tmp1, ptr %tmp2, align 4
+; CHECK:     into the interleave group with  store i32 %tmp3, ptr %tmp0, align 4
+; CHECK: LV: Creating an interleave group with:  %tmp3 = load i32, ptr %tmp2, align 4
+; CHECK: LV: Inserted:  %tmp1 = load i32, ptr %tmp0, align 4
+; CHECK:     into the interleave group with  %tmp3 = load i32, ptr %tmp2, align 4
 
-; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   %tmp1 = load i32, i32* %tmp0, align 4
-; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %tmp3 = load i32, i32* %tmp2, align 4
+; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   %tmp1 = load i32, ptr %tmp0, align 4
+; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %tmp3 = load i32, ptr %tmp2, align 4
 ;            (vl; vl; vperm, vpkg)
 
-; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   store i32 %tmp1, i32* %tmp2, align 4
-; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   store i32 %tmp3, i32* %tmp0, align 4
+; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   store i32 %tmp1, ptr %tmp2, align 4
+; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   store i32 %tmp3, ptr %tmp0, align 4
 ;            (vmrlf; vmrhf; vst; vst)
 }
 

diff  --git a/llvm/test/Transforms/LoopVectorize/SystemZ/zero_unroll.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/zero_unroll.ll
index 63f42a8156e62..d469178fec0e7 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/zero_unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/zero_unroll.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -S -passes=loop-vectorize -mtriple=s390x-linux-gnu -tiny-trip-count-interleave-threshold=4 -vectorizer-min-trip-count=8 < %s | FileCheck %s
 
-define i32 @main(i32 %arg, i8** nocapture readnone %arg1) #0 {
+define i32 @main(i32 %arg, ptr nocapture readnone %arg1) #0 {
 ;CHECK: vector.body:
 entry:
   %0 = alloca i8, align 1
@@ -8,7 +8,7 @@ entry:
 
 loop:
   %storemerge.i.i = phi i8 [ 0, %entry ], [ %tmp12.i.i, %loop ]
-  store i8 %storemerge.i.i, i8* %0, align 2
+  store i8 %storemerge.i.i, ptr %0, align 2
   %tmp8.i.i = icmp ult i8 %storemerge.i.i, 8
   %tmp12.i.i = add nuw nsw i8 %storemerge.i.i, 1
   br i1 %tmp8.i.i, label %loop, label %ret

diff  --git a/llvm/test/Transforms/LoopVectorize/VE/disable_lv.ll b/llvm/test/Transforms/LoopVectorize/VE/disable_lv.ll
index 5c094d1967691..5dbc7722fe0a6 100644
--- a/llvm/test/Transforms/LoopVectorize/VE/disable_lv.ll
+++ b/llvm/test/Transforms/LoopVectorize/VE/disable_lv.ll
@@ -8,7 +8,7 @@
 ; VE-NOT: llvm.loop.isvectorized
 ; AVX: llvm.loop.isvectorized
 
-define dso_local void @foo(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32 signext %n) local_unnamed_addr {
+define dso_local void @foo(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, i32 signext %n) local_unnamed_addr {
 entry:
   %cmp = icmp sgt i32 %n, 0
   br i1 %cmp, label %omp.inner.for.body.preheader, label %simd.if.end
@@ -19,11 +19,11 @@ omp.inner.for.body.preheader:                     ; preds = %entry
 
 omp.inner.for.body:                               ; preds = %omp.inner.for.body.preheader, %omp.inner.for.body
   %indvars.iv = phi i64 [ 0, %omp.inner.for.body.preheader ], [ %indvars.iv.next, %omp.inner.for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4, !llvm.access.group !6
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4, !llvm.access.group !6
   %mul6 = mul nsw i32 %0, 3
-  %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %mul6, i32* %arrayidx8, align 4, !llvm.access.group !6
+  %arrayidx8 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  store i32 %mul6, ptr %arrayidx8, align 4, !llvm.access.group !6
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond.not, label %simd.if.end, label %omp.inner.for.body, !llvm.loop !7

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll b/llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll
index b79b7a569b274..dad1853529863 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll
@@ -23,8 +23,8 @@ for.body:                                         ; preds = %for.body, %entry
 ; CHECK: LV: We can vectorize this loop!
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds [255 x i32], [255 x i32]* @a, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [255 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, %red.05
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 255

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/avx1.ll b/llvm/test/Transforms/LoopVectorize/X86/avx1.ll
index 5e56b8a5c6a15..9e205863b8367 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/avx1.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/avx1.ll
@@ -7,16 +7,16 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; CHECK-LABEL: @read_mod_write_single_ptr(
 ; CHECK: load <8 x float>
 ; CHECK: ret i32
-define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwtable ssp {
+define i32 @read_mod_write_single_ptr(ptr nocapture %a, i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %3 = load float, float* %2, align 4
+  %2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %3 = load float, ptr %2, align 4
   %4 = fmul float %3, 3.000000e+00
-  store float %4, float* %2, align 4
+  store float %4, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -31,16 +31,16 @@ define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwta
 ; SLOWMEM32: load <2 x i64>
 ; FASTMEM32: load <4 x i64>
 ; CHECK: ret i32
-define i32 @read_mod_i64(i64* nocapture %a, i32 %n) nounwind uwtable ssp {
+define i32 @read_mod_i64(ptr nocapture %a, i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i64, i64* %a, i64 %indvars.iv
-  %3 = load i64, i64* %2, align 4
+  %2 = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  %3 = load i64, ptr %2, align 4
   %4 = add i64 %3, 3
-  store i64 %4, i64* %2, align 4
+  store i64 %4, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/avx512.ll b/llvm/test/Transforms/LoopVectorize/X86/avx512.ll
index e8668e7bd42b6..44aae477bf71c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/avx512.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/avx512.ll
@@ -19,7 +19,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 ; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}},
 ; CHECK-PREFER-AVX256-NOT: %zmm
 
-define void @f(i32* %a, i32 %n) {
+define void @f(ptr %a, i32 %n) {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body.preheader, label %for.end
@@ -29,8 +29,8 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 %n, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %n, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -54,7 +54,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}},
 ; CHECK-PREFER-AVX256-NOT: %zmm
 
-define void @g(i32* %a, i32 %n) "prefer-vector-width"="256" {
+define void @g(ptr %a, i32 %n) "prefer-vector-width"="256" {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body.preheader, label %for.end
@@ -64,8 +64,8 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 %n, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %n, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -93,7 +93,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; CHECK-PREFER-AVX256: epilog
 ; CHECK-PREFER-AVX256: %ymm
 
-define void @h(i32* %a, i32 %n) "prefer-vector-width"="512" {
+define void @h(ptr %a, i32 %n) "prefer-vector-width"="512" {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body.preheader, label %for.end
@@ -103,8 +103,8 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 %n, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %n, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-cg-bug.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-cg-bug.ll
index 9ea4e6acedb6d..a8a60dd46a9a1 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-cg-bug.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-cg-bug.ll
@@ -57,9 +57,9 @@ define void @test_01() {
   %8 = phi i32 [ %.ph2, %.outer ], [ %7, %6 ]
   %9 = add i32 %8, 2
   %10 = zext i32 %9 to i64
-  %11 = getelementptr inbounds i32, i32 addrspace(1)* undef, i64 %10
+  %11 = getelementptr inbounds i32, ptr addrspace(1) undef, i64 %10
   %12 = ashr i32 undef, %4
-  store i32 %12, i32 addrspace(1)* %11, align 4
+  store i32 %12, ptr addrspace(1) %11, align 4
   %13 = add i32 %7, 1
   %14 = icmp sgt i32 %13, 61
   br i1 %14, label %._crit_edge.loopexit, label %6
@@ -96,9 +96,9 @@ define void @test_02() {
   %8 = phi i32 [ %.ph2, %.outer ], [ %7, %6 ]
   %9 = add i32 %8, 2
   %10 = zext i32 %9 to i64
-  %11 = getelementptr inbounds i32, i32 addrspace(1)* undef, i64 %10
+  %11 = getelementptr inbounds i32, ptr addrspace(1) undef, i64 %10
   %12 = ashr i32 undef, %4
-  store i32 %12, i32 addrspace(1)* %11, align 4
+  store i32 %12, ptr addrspace(1) %11, align 4
   %13 = add i32 %7, 1
   %14 = icmp sgt i32 %13, 610
   br i1 %14, label %._crit_edge.loopexit, label %6

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
index 8c351d5ae0c36..2d3260614f89e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
@@ -15,11 +15,11 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %shl = ashr i32 %0, 3
-  %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  store i32 %shl, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  store i32 %shl, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index 0dd28c7e1a830..64f3b1c5235de 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -17,16 +17,16 @@ define void @cost_model_1() nounwind uwtable noinline ssp {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = shl nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[IDXPROM1:%.*]] = sext i32 [[TMP1]] to i64
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[IDXPROM1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @d, i64 0, i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 [[IDXPROM1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2048 x i32], ptr @d, i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
 ; CHECK-NEXT:    [[IDXPROM5:%.*]] = sext i32 [[TMP3]] to i64
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[IDXPROM5]]
-; CHECK-NEXT:    store i32 [[TMP2]], i32* [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 [[IDXPROM5]]
+; CHECK-NEXT:    store i32 [[TMP2]], ptr [[ARRAYIDX6]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256
@@ -40,16 +40,16 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = shl nsw i64 %indvars.iv, 1
-  %arrayidx = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %0
-  %1 = load i32, i32* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %0
+  %1 = load i32, ptr %arrayidx, align 8
   %idxprom1 = sext i32 %1 to i64
-  %arrayidx2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %idxprom1
-  %2 = load i32, i32* %arrayidx2, align 4
-  %arrayidx4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @d, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %arrayidx4, align 4
+  %arrayidx2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %idxprom1
+  %2 = load i32, ptr %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds [2048 x i32], ptr @d, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %arrayidx4, align 4
   %idxprom5 = sext i32 %3 to i64
-  %arrayidx6 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %idxprom5
-  store i32 %2, i32* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %idxprom5
+  store i32 %2, ptr %arrayidx6, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 256
@@ -64,7 +64,7 @@ for.end:                                          ; preds = %for.body
 ; then we don't need to extract the pointers out of vector of pointers,
 ; and the vectorization becomes profitable.
 
-define float @PR27826(float* nocapture readonly %a, float* nocapture readonly %b, i32 %n) {
+define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) {
 ; CHECK-LABEL: @PR27826(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -104,98 +104,98 @@ define float @PR27826(float* nocapture readonly %a, float* nocapture readonly %b
 ; CHECK-NEXT:    [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 416
 ; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], 448
 ; CHECK-NEXT:    [[TMP18:%.*]] = add i64 [[OFFSET_IDX]], 480
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP4]]
-; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP5]]
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP8]]
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP10]]
-; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]]
-; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP14]]
-; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]]
-; CHECK-NEXT:    [[TMP33:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP17]]
-; CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP18]]
-; CHECK-NEXT:    [[TMP35:%.*]] = load float, float* [[TMP19]], align 4
-; CHECK-NEXT:    [[TMP36:%.*]] = load float, float* [[TMP20]], align 4
-; CHECK-NEXT:    [[TMP37:%.*]] = load float, float* [[TMP21]], align 4
-; CHECK-NEXT:    [[TMP38:%.*]] = load float, float* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP10]]
+; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]]
+; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP16]]
+; CHECK-NEXT:    [[TMP33:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP17]]
+; CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP18]]
+; CHECK-NEXT:    [[TMP35:%.*]] = load float, ptr [[TMP19]], align 4
+; CHECK-NEXT:    [[TMP36:%.*]] = load float, ptr [[TMP20]], align 4
+; CHECK-NEXT:    [[TMP37:%.*]] = load float, ptr [[TMP21]], align 4
+; CHECK-NEXT:    [[TMP38:%.*]] = load float, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x float> poison, float [[TMP35]], i32 0
 ; CHECK-NEXT:    [[TMP40:%.*]] = insertelement <4 x float> [[TMP39]], float [[TMP36]], i32 1
 ; CHECK-NEXT:    [[TMP41:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP37]], i32 2
 ; CHECK-NEXT:    [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP38]], i32 3
-; CHECK-NEXT:    [[TMP43:%.*]] = load float, float* [[TMP23]], align 4
-; CHECK-NEXT:    [[TMP44:%.*]] = load float, float* [[TMP24]], align 4
-; CHECK-NEXT:    [[TMP45:%.*]] = load float, float* [[TMP25]], align 4
-; CHECK-NEXT:    [[TMP46:%.*]] = load float, float* [[TMP26]], align 4
+; CHECK-NEXT:    [[TMP43:%.*]] = load float, ptr [[TMP23]], align 4
+; CHECK-NEXT:    [[TMP44:%.*]] = load float, ptr [[TMP24]], align 4
+; CHECK-NEXT:    [[TMP45:%.*]] = load float, ptr [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP46:%.*]] = load float, ptr [[TMP26]], align 4
 ; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x float> poison, float [[TMP43]], i32 0
 ; CHECK-NEXT:    [[TMP48:%.*]] = insertelement <4 x float> [[TMP47]], float [[TMP44]], i32 1
 ; CHECK-NEXT:    [[TMP49:%.*]] = insertelement <4 x float> [[TMP48]], float [[TMP45]], i32 2
 ; CHECK-NEXT:    [[TMP50:%.*]] = insertelement <4 x float> [[TMP49]], float [[TMP46]], i32 3
-; CHECK-NEXT:    [[TMP51:%.*]] = load float, float* [[TMP27]], align 4
-; CHECK-NEXT:    [[TMP52:%.*]] = load float, float* [[TMP28]], align 4
-; CHECK-NEXT:    [[TMP53:%.*]] = load float, float* [[TMP29]], align 4
-; CHECK-NEXT:    [[TMP54:%.*]] = load float, float* [[TMP30]], align 4
+; CHECK-NEXT:    [[TMP51:%.*]] = load float, ptr [[TMP27]], align 4
+; CHECK-NEXT:    [[TMP52:%.*]] = load float, ptr [[TMP28]], align 4
+; CHECK-NEXT:    [[TMP53:%.*]] = load float, ptr [[TMP29]], align 4
+; CHECK-NEXT:    [[TMP54:%.*]] = load float, ptr [[TMP30]], align 4
 ; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x float> poison, float [[TMP51]], i32 0
 ; CHECK-NEXT:    [[TMP56:%.*]] = insertelement <4 x float> [[TMP55]], float [[TMP52]], i32 1
 ; CHECK-NEXT:    [[TMP57:%.*]] = insertelement <4 x float> [[TMP56]], float [[TMP53]], i32 2
 ; CHECK-NEXT:    [[TMP58:%.*]] = insertelement <4 x float> [[TMP57]], float [[TMP54]], i32 3
-; CHECK-NEXT:    [[TMP59:%.*]] = load float, float* [[TMP31]], align 4
-; CHECK-NEXT:    [[TMP60:%.*]] = load float, float* [[TMP32]], align 4
-; CHECK-NEXT:    [[TMP61:%.*]] = load float, float* [[TMP33]], align 4
-; CHECK-NEXT:    [[TMP62:%.*]] = load float, float* [[TMP34]], align 4
+; CHECK-NEXT:    [[TMP59:%.*]] = load float, ptr [[TMP31]], align 4
+; CHECK-NEXT:    [[TMP60:%.*]] = load float, ptr [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP61:%.*]] = load float, ptr [[TMP33]], align 4
+; CHECK-NEXT:    [[TMP62:%.*]] = load float, ptr [[TMP34]], align 4
 ; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x float> poison, float [[TMP59]], i32 0
 ; CHECK-NEXT:    [[TMP64:%.*]] = insertelement <4 x float> [[TMP63]], float [[TMP60]], i32 1
 ; CHECK-NEXT:    [[TMP65:%.*]] = insertelement <4 x float> [[TMP64]], float [[TMP61]], i32 2
 ; CHECK-NEXT:    [[TMP66:%.*]] = insertelement <4 x float> [[TMP65]], float [[TMP62]], i32 3
-; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP4]]
-; CHECK-NEXT:    [[TMP69:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP5]]
-; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP71:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP8]]
-; CHECK-NEXT:    [[TMP73:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP10]]
-; CHECK-NEXT:    [[TMP75:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP76:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP12]]
-; CHECK-NEXT:    [[TMP77:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP78:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP14]]
-; CHECK-NEXT:    [[TMP79:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP80:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP16]]
-; CHECK-NEXT:    [[TMP81:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP17]]
-; CHECK-NEXT:    [[TMP82:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP18]]
-; CHECK-NEXT:    [[TMP83:%.*]] = load float, float* [[TMP67]], align 4
-; CHECK-NEXT:    [[TMP84:%.*]] = load float, float* [[TMP68]], align 4
-; CHECK-NEXT:    [[TMP85:%.*]] = load float, float* [[TMP69]], align 4
-; CHECK-NEXT:    [[TMP86:%.*]] = load float, float* [[TMP70]], align 4
+; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP69:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP71:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP73:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP10]]
+; CHECK-NEXT:    [[TMP75:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP76:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP77:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP78:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP14]]
+; CHECK-NEXT:    [[TMP79:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP80:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP16]]
+; CHECK-NEXT:    [[TMP81:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP17]]
+; CHECK-NEXT:    [[TMP82:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP18]]
+; CHECK-NEXT:    [[TMP83:%.*]] = load float, ptr [[TMP67]], align 4
+; CHECK-NEXT:    [[TMP84:%.*]] = load float, ptr [[TMP68]], align 4
+; CHECK-NEXT:    [[TMP85:%.*]] = load float, ptr [[TMP69]], align 4
+; CHECK-NEXT:    [[TMP86:%.*]] = load float, ptr [[TMP70]], align 4
 ; CHECK-NEXT:    [[TMP87:%.*]] = insertelement <4 x float> poison, float [[TMP83]], i32 0
 ; CHECK-NEXT:    [[TMP88:%.*]] = insertelement <4 x float> [[TMP87]], float [[TMP84]], i32 1
 ; CHECK-NEXT:    [[TMP89:%.*]] = insertelement <4 x float> [[TMP88]], float [[TMP85]], i32 2
 ; CHECK-NEXT:    [[TMP90:%.*]] = insertelement <4 x float> [[TMP89]], float [[TMP86]], i32 3
-; CHECK-NEXT:    [[TMP91:%.*]] = load float, float* [[TMP71]], align 4
-; CHECK-NEXT:    [[TMP92:%.*]] = load float, float* [[TMP72]], align 4
-; CHECK-NEXT:    [[TMP93:%.*]] = load float, float* [[TMP73]], align 4
-; CHECK-NEXT:    [[TMP94:%.*]] = load float, float* [[TMP74]], align 4
+; CHECK-NEXT:    [[TMP91:%.*]] = load float, ptr [[TMP71]], align 4
+; CHECK-NEXT:    [[TMP92:%.*]] = load float, ptr [[TMP72]], align 4
+; CHECK-NEXT:    [[TMP93:%.*]] = load float, ptr [[TMP73]], align 4
+; CHECK-NEXT:    [[TMP94:%.*]] = load float, ptr [[TMP74]], align 4
 ; CHECK-NEXT:    [[TMP95:%.*]] = insertelement <4 x float> poison, float [[TMP91]], i32 0
 ; CHECK-NEXT:    [[TMP96:%.*]] = insertelement <4 x float> [[TMP95]], float [[TMP92]], i32 1
 ; CHECK-NEXT:    [[TMP97:%.*]] = insertelement <4 x float> [[TMP96]], float [[TMP93]], i32 2
 ; CHECK-NEXT:    [[TMP98:%.*]] = insertelement <4 x float> [[TMP97]], float [[TMP94]], i32 3
-; CHECK-NEXT:    [[TMP99:%.*]] = load float, float* [[TMP75]], align 4
-; CHECK-NEXT:    [[TMP100:%.*]] = load float, float* [[TMP76]], align 4
-; CHECK-NEXT:    [[TMP101:%.*]] = load float, float* [[TMP77]], align 4
-; CHECK-NEXT:    [[TMP102:%.*]] = load float, float* [[TMP78]], align 4
+; CHECK-NEXT:    [[TMP99:%.*]] = load float, ptr [[TMP75]], align 4
+; CHECK-NEXT:    [[TMP100:%.*]] = load float, ptr [[TMP76]], align 4
+; CHECK-NEXT:    [[TMP101:%.*]] = load float, ptr [[TMP77]], align 4
+; CHECK-NEXT:    [[TMP102:%.*]] = load float, ptr [[TMP78]], align 4
 ; CHECK-NEXT:    [[TMP103:%.*]] = insertelement <4 x float> poison, float [[TMP99]], i32 0
 ; CHECK-NEXT:    [[TMP104:%.*]] = insertelement <4 x float> [[TMP103]], float [[TMP100]], i32 1
 ; CHECK-NEXT:    [[TMP105:%.*]] = insertelement <4 x float> [[TMP104]], float [[TMP101]], i32 2
 ; CHECK-NEXT:    [[TMP106:%.*]] = insertelement <4 x float> [[TMP105]], float [[TMP102]], i32 3
-; CHECK-NEXT:    [[TMP107:%.*]] = load float, float* [[TMP79]], align 4
-; CHECK-NEXT:    [[TMP108:%.*]] = load float, float* [[TMP80]], align 4
-; CHECK-NEXT:    [[TMP109:%.*]] = load float, float* [[TMP81]], align 4
-; CHECK-NEXT:    [[TMP110:%.*]] = load float, float* [[TMP82]], align 4
+; CHECK-NEXT:    [[TMP107:%.*]] = load float, ptr [[TMP79]], align 4
+; CHECK-NEXT:    [[TMP108:%.*]] = load float, ptr [[TMP80]], align 4
+; CHECK-NEXT:    [[TMP109:%.*]] = load float, ptr [[TMP81]], align 4
+; CHECK-NEXT:    [[TMP110:%.*]] = load float, ptr [[TMP82]], align 4
 ; CHECK-NEXT:    [[TMP111:%.*]] = insertelement <4 x float> poison, float [[TMP107]], i32 0
 ; CHECK-NEXT:    [[TMP112:%.*]] = insertelement <4 x float> [[TMP111]], float [[TMP108]], i32 1
 ; CHECK-NEXT:    [[TMP113:%.*]] = insertelement <4 x float> [[TMP112]], float [[TMP109]], i32 2
@@ -225,10 +225,10 @@ define float @PR27826(float* nocapture readonly %a, float* nocapture readonly %b
 ; CHECK:       for:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR]] ]
 ; CHECK-NEXT:    [[S_02:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD4:%.*]], [[FOR]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[T1:%.*]] = load float, float* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[T2:%.*]] = load float, float* [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[T1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[T2:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[T1]], [[S_02]]
 ; CHECK-NEXT:    [[ADD4]] = fadd fast float [[ADD]], [[T2]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 32
@@ -252,10 +252,10 @@ preheader:
 for:
   %indvars.iv = phi i64 [ 0, %preheader ], [ %indvars.iv.next, %for ]
   %s.02 = phi float [ 0.0, %preheader ], [ %add4, %for ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %t1 = load float, float* %arrayidx, align 4
-  %arrayidx3 = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %t2 = load float, float* %arrayidx3, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %t1 = load float, ptr %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %t2 = load float, ptr %arrayidx3, align 4
   %add = fadd fast float %t1, %s.02
   %add4 = fadd fast float %add, %t2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 32

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
index e925e636bcae5..43c5cbc9e6a4c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
@@ -6,7 +6,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 ; This test checks auto-vectorization with FP induction variable.
 ; FMF is required on the IR instructions.
 
-;void fp_iv_loop1(float * __restrict__ A, int N) {
+;void fp_iv_loop1(ptr __restrict__ A, int N) {
 ;  float x = 1.0;
 ;  for (int i=0; i < N; ++i) {
 ;    A[i] = x;
@@ -14,7 +14,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 ;  }
 ;}
 
-define void @fp_iv_loop1(float* noalias nocapture %A, i32 %N) #0 {
+define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC-LABEL: @fp_iv_loop1(
 ; AUTO_VEC-NEXT:  entry:
 ; AUTO_VEC-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -44,69 +44,53 @@ define void @fp_iv_loop1(float* noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC-NEXT:    [[STEP_ADD:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01>
-; AUTO_VEC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; AUTO_VEC-NEXT:    [[TMP6:%.*]] = bitcast float* [[TMP5]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND]], <8 x float>* [[TMP6]], align 4
-; AUTO_VEC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 8
-; AUTO_VEC-NEXT:    [[TMP8:%.*]] = bitcast float* [[TMP7]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD]], <8 x float>* [[TMP8]], align 4
-; AUTO_VEC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 16
-; AUTO_VEC-NEXT:    [[TMP10:%.*]] = bitcast float* [[TMP9]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2]], <8 x float>* [[TMP10]], align 4
-; AUTO_VEC-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 24
-; AUTO_VEC-NEXT:    [[TMP12:%.*]] = bitcast float* [[TMP11]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3]], <8 x float>* [[TMP12]], align 4
+; AUTO_VEC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND]], ptr [[TMP5]], align 4
+; AUTO_VEC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 8
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD]], ptr [[TMP7]], align 4
+; AUTO_VEC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 16
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2]], ptr [[TMP9]], align 4
+; AUTO_VEC-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 24
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3]], ptr [[TMP11]], align 4
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT:%.*]] = or i64 [[INDEX]], 32
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD_1:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 2.000000e+01, float 2.000000e+01, float 2.000000e+01, float 2.000000e+01, float 2.000000e+01, float 2.000000e+01, float 2.000000e+01, float 2.000000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_1:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 2.400000e+01, float 2.400000e+01, float 2.400000e+01, float 2.400000e+01, float 2.400000e+01, float 2.400000e+01, float 2.400000e+01, float 2.400000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_1:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 2.800000e+01, float 2.800000e+01, float 2.800000e+01, float 2.800000e+01, float 2.800000e+01, float 2.800000e+01, float 2.800000e+01, float 2.800000e+01>
-; AUTO_VEC-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX_NEXT]]
-; AUTO_VEC-NEXT:    [[TMP14:%.*]] = bitcast float* [[TMP13]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND_NEXT]], <8 x float>* [[TMP14]], align 4
-; AUTO_VEC-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 8
-; AUTO_VEC-NEXT:    [[TMP16:%.*]] = bitcast float* [[TMP15]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD_1]], <8 x float>* [[TMP16]], align 4
-; AUTO_VEC-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 16
-; AUTO_VEC-NEXT:    [[TMP18:%.*]] = bitcast float* [[TMP17]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2_1]], <8 x float>* [[TMP18]], align 4
-; AUTO_VEC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 24
-; AUTO_VEC-NEXT:    [[TMP20:%.*]] = bitcast float* [[TMP19]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3_1]], <8 x float>* [[TMP20]], align 4
+; AUTO_VEC-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX_NEXT]]
+; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND_NEXT]], ptr [[TMP13]], align 4
+; AUTO_VEC-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 8
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD_1]], ptr [[TMP15]], align 4
+; AUTO_VEC-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 16
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2_1]], ptr [[TMP17]], align 4
+; AUTO_VEC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 24
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3_1]], ptr [[TMP19]], align 4
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_1:%.*]] = or i64 [[INDEX]], 64
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_1:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 3.200000e+01, float 3.200000e+01, float 3.200000e+01, float 3.200000e+01, float 3.200000e+01, float 3.200000e+01, float 3.200000e+01, float 3.200000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD_2:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 3.600000e+01, float 3.600000e+01, float 3.600000e+01, float 3.600000e+01, float 3.600000e+01, float 3.600000e+01, float 3.600000e+01, float 3.600000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_2:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 4.000000e+01, float 4.000000e+01, float 4.000000e+01, float 4.000000e+01, float 4.000000e+01, float 4.000000e+01, float 4.000000e+01, float 4.000000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_2:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 4.400000e+01, float 4.400000e+01, float 4.400000e+01, float 4.400000e+01, float 4.400000e+01, float 4.400000e+01, float 4.400000e+01, float 4.400000e+01>
-; AUTO_VEC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX_NEXT_1]]
-; AUTO_VEC-NEXT:    [[TMP22:%.*]] = bitcast float* [[TMP21]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND_NEXT_1]], <8 x float>* [[TMP22]], align 4
-; AUTO_VEC-NEXT:    [[TMP23:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 8
-; AUTO_VEC-NEXT:    [[TMP24:%.*]] = bitcast float* [[TMP23]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD_2]], <8 x float>* [[TMP24]], align 4
-; AUTO_VEC-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 16
-; AUTO_VEC-NEXT:    [[TMP26:%.*]] = bitcast float* [[TMP25]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2_2]], <8 x float>* [[TMP26]], align 4
-; AUTO_VEC-NEXT:    [[TMP27:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 24
-; AUTO_VEC-NEXT:    [[TMP28:%.*]] = bitcast float* [[TMP27]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3_2]], <8 x float>* [[TMP28]], align 4
+; AUTO_VEC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX_NEXT_1]]
+; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND_NEXT_1]], ptr [[TMP21]], align 4
+; AUTO_VEC-NEXT:    [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 8
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD_2]], ptr [[TMP23]], align 4
+; AUTO_VEC-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 16
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2_2]], ptr [[TMP25]], align 4
+; AUTO_VEC-NEXT:    [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 24
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3_2]], ptr [[TMP27]], align 4
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_2:%.*]] = or i64 [[INDEX]], 96
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_2:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 4.800000e+01, float 4.800000e+01, float 4.800000e+01, float 4.800000e+01, float 4.800000e+01, float 4.800000e+01, float 4.800000e+01, float 4.800000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD_3:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 5.200000e+01, float 5.200000e+01, float 5.200000e+01, float 5.200000e+01, float 5.200000e+01, float 5.200000e+01, float 5.200000e+01, float 5.200000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_3:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 5.600000e+01, float 5.600000e+01, float 5.600000e+01, float 5.600000e+01, float 5.600000e+01, float 5.600000e+01, float 5.600000e+01, float 5.600000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_3:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 6.000000e+01, float 6.000000e+01, float 6.000000e+01, float 6.000000e+01, float 6.000000e+01, float 6.000000e+01, float 6.000000e+01, float 6.000000e+01>
-; AUTO_VEC-NEXT:    [[TMP29:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX_NEXT_2]]
-; AUTO_VEC-NEXT:    [[TMP30:%.*]] = bitcast float* [[TMP29]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND_NEXT_2]], <8 x float>* [[TMP30]], align 4
-; AUTO_VEC-NEXT:    [[TMP31:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 8
-; AUTO_VEC-NEXT:    [[TMP32:%.*]] = bitcast float* [[TMP31]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD_3]], <8 x float>* [[TMP32]], align 4
-; AUTO_VEC-NEXT:    [[TMP33:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 16
-; AUTO_VEC-NEXT:    [[TMP34:%.*]] = bitcast float* [[TMP33]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2_3]], <8 x float>* [[TMP34]], align 4
-; AUTO_VEC-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 24
-; AUTO_VEC-NEXT:    [[TMP36:%.*]] = bitcast float* [[TMP35]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3_3]], <8 x float>* [[TMP36]], align 4
+; AUTO_VEC-NEXT:    [[TMP29:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX_NEXT_2]]
+; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND_NEXT_2]], ptr [[TMP29]], align 4
+; AUTO_VEC-NEXT:    [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 8
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD_3]], ptr [[TMP31]], align 4
+; AUTO_VEC-NEXT:    [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 16
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2_3]], ptr [[TMP33]], align 4
+; AUTO_VEC-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 24
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3_3]], ptr [[TMP35]], align 4
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_3]] = add nuw i64 [[INDEX]], 128
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_3]] = fadd fast <8 x float> [[VEC_IND]], <float 6.400000e+01, float 6.400000e+01, float 6.400000e+01, float 6.400000e+01, float 6.400000e+01, float 6.400000e+01, float 6.400000e+01, float 6.400000e+01>
 ; AUTO_VEC-NEXT:    [[NITER_NEXT_3]] = add i64 [[NITER]], 4
@@ -124,18 +108,14 @@ define void @fp_iv_loop1(float* noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC-NEXT:    [[STEP_ADD_EPIL:%.*]] = fadd fast <8 x float> [[VEC_IND_EPIL]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_EPIL:%.*]] = fadd fast <8 x float> [[VEC_IND_EPIL]], <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_EPIL:%.*]] = fadd fast <8 x float> [[VEC_IND_EPIL]], <float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01>
-; AUTO_VEC-NEXT:    [[TMP37:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX_EPIL]]
-; AUTO_VEC-NEXT:    [[TMP38:%.*]] = bitcast float* [[TMP37]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND_EPIL]], <8 x float>* [[TMP38]], align 4
-; AUTO_VEC-NEXT:    [[TMP39:%.*]] = getelementptr inbounds float, float* [[TMP37]], i64 8
-; AUTO_VEC-NEXT:    [[TMP40:%.*]] = bitcast float* [[TMP39]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD_EPIL]], <8 x float>* [[TMP40]], align 4
-; AUTO_VEC-NEXT:    [[TMP41:%.*]] = getelementptr inbounds float, float* [[TMP37]], i64 16
-; AUTO_VEC-NEXT:    [[TMP42:%.*]] = bitcast float* [[TMP41]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2_EPIL]], <8 x float>* [[TMP42]], align 4
-; AUTO_VEC-NEXT:    [[TMP43:%.*]] = getelementptr inbounds float, float* [[TMP37]], i64 24
-; AUTO_VEC-NEXT:    [[TMP44:%.*]] = bitcast float* [[TMP43]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3_EPIL]], <8 x float>* [[TMP44]], align 4
+; AUTO_VEC-NEXT:    [[TMP37:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX_EPIL]]
+; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND_EPIL]], ptr [[TMP37]], align 4
+; AUTO_VEC-NEXT:    [[TMP39:%.*]] = getelementptr inbounds float, ptr [[TMP37]], i64 8
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD_EPIL]], ptr [[TMP39]], align 4
+; AUTO_VEC-NEXT:    [[TMP41:%.*]] = getelementptr inbounds float, ptr [[TMP37]], i64 16
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2_EPIL]], ptr [[TMP41]], align 4
+; AUTO_VEC-NEXT:    [[TMP43:%.*]] = getelementptr inbounds float, ptr [[TMP37]], i64 24
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3_EPIL]], ptr [[TMP43]], align 4
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_EPIL]] = add nuw i64 [[INDEX_EPIL]], 32
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_EPIL]] = fadd fast <8 x float> [[VEC_IND_EPIL]], <float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01>
 ; AUTO_VEC-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
@@ -147,8 +127,8 @@ define void @fp_iv_loop1(float* noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC:       for.body:
 ; AUTO_VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
 ; AUTO_VEC-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
-; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; AUTO_VEC-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; AUTO_VEC-NEXT:    [[TMP45:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
@@ -166,8 +146,8 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  store float %x.06, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  store float %x.06, ptr %arrayidx, align 4
   %conv1 = fadd fast float %x.06, 5.000000e-01
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -183,7 +163,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 
 ; The same as the previous, but FP operation has no FMF.
 ; Vectorization should be rejected.
-;void fp_iv_loop2(float * __restrict__ A, int N) {
+;void fp_iv_loop2(ptr __restrict__ A, int N) {
 ;  float x = 1.0;
 ;  for (int i=0; i < N; ++i) {
 ;    A[i] = x;
@@ -191,7 +171,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ;  }
 ;}
 
-define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) {
+define void @fp_iv_loop2(ptr noalias nocapture %A, i32 %N) {
 ; AUTO_VEC-LABEL: @fp_iv_loop2(
 ; AUTO_VEC-NEXT:  entry:
 ; AUTO_VEC-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -208,36 +188,36 @@ define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) {
 ; AUTO_VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[X_06:%.*]] = phi float [ 1.000000e+00, [[FOR_BODY_PREHEADER_NEW]] ], [ [[CONV1_7:%.*]], [[FOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY]] ]
-; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDVARS_IV]]
-; AUTO_VEC-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1:%.*]] = fadd float [[X_06]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT:%.*]] = or i64 [[INDVARS_IV]], 1
-; AUTO_VEC-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT]]
-; AUTO_VEC-NEXT:    store float [[CONV1]], float* [[ARRAYIDX_1]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; AUTO_VEC-NEXT:    store float [[CONV1]], ptr [[ARRAYIDX_1]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_1:%.*]] = fadd float [[CONV1]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = or i64 [[INDVARS_IV]], 2
-; AUTO_VEC-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT_1]]
-; AUTO_VEC-NEXT:    store float [[CONV1_1]], float* [[ARRAYIDX_2]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
+; AUTO_VEC-NEXT:    store float [[CONV1_1]], ptr [[ARRAYIDX_2]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_2:%.*]] = fadd float [[CONV1_1]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = or i64 [[INDVARS_IV]], 3
-; AUTO_VEC-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT_2]]
-; AUTO_VEC-NEXT:    store float [[CONV1_2]], float* [[ARRAYIDX_3]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
+; AUTO_VEC-NEXT:    store float [[CONV1_2]], ptr [[ARRAYIDX_3]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_3:%.*]] = fadd float [[CONV1_2]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = or i64 [[INDVARS_IV]], 4
-; AUTO_VEC-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT_3]]
-; AUTO_VEC-NEXT:    store float [[CONV1_3]], float* [[ARRAYIDX_4]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
+; AUTO_VEC-NEXT:    store float [[CONV1_3]], ptr [[ARRAYIDX_4]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_4:%.*]] = fadd float [[CONV1_3]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = or i64 [[INDVARS_IV]], 5
-; AUTO_VEC-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT_4]]
-; AUTO_VEC-NEXT:    store float [[CONV1_4]], float* [[ARRAYIDX_5]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
+; AUTO_VEC-NEXT:    store float [[CONV1_4]], ptr [[ARRAYIDX_5]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_5:%.*]] = fadd float [[CONV1_4]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = or i64 [[INDVARS_IV]], 6
-; AUTO_VEC-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT_5]]
-; AUTO_VEC-NEXT:    store float [[CONV1_5]], float* [[ARRAYIDX_6]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
+; AUTO_VEC-NEXT:    store float [[CONV1_5]], ptr [[ARRAYIDX_6]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_6:%.*]] = fadd float [[CONV1_5]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = or i64 [[INDVARS_IV]], 7
-; AUTO_VEC-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT_6]]
-; AUTO_VEC-NEXT:    store float [[CONV1_6]], float* [[ARRAYIDX_7]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
+; AUTO_VEC-NEXT:    store float [[CONV1_6]], ptr [[ARRAYIDX_7]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_7]] = fadd float [[CONV1_6]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8
 ; AUTO_VEC-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
@@ -252,8 +232,8 @@ define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) {
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[INDVARS_IV_UNR]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
 ; AUTO_VEC-NEXT:    [[X_06_EPIL:%.*]] = phi float [ [[CONV1_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[X_06_UNR]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
 ; AUTO_VEC-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[FOR_BODY_EPIL]] ], [ 0, [[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
-; AUTO_VEC-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_EPIL]]
-; AUTO_VEC-NEXT:    store float [[X_06_EPIL]], float* [[ARRAYIDX_EPIL]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
+; AUTO_VEC-NEXT:    store float [[X_06_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_EPIL]] = fadd float [[X_06_EPIL]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_EPIL]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 1
 ; AUTO_VEC-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
@@ -272,8 +252,8 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  store float %x.06, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  store float %x.06, ptr %arrayidx, align 4
   %conv1 = fadd float %x.06, 5.000000e-01
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -287,7 +267,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
   ret void
 }
 
-define double @external_use_with_fast_math(double* %a, i64 %n) {
+define double @external_use_with_fast_math(ptr %a, i64 %n) {
 ; AUTO_VEC-LABEL: @external_use_with_fast_math(
 ; AUTO_VEC-NEXT:  entry:
 ; AUTO_VEC-NEXT:    [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
@@ -313,69 +293,53 @@ define double @external_use_with_fast_math(double* %a, i64 %n) {
 ; AUTO_VEC-NEXT:    [[STEP_ADD:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.200000e+01, double 1.200000e+01, double 1.200000e+01, double 1.200000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 2.400000e+01, double 2.400000e+01, double 2.400000e+01, double 2.400000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 3.600000e+01, double 3.600000e+01, double 3.600000e+01, double 3.600000e+01>
-; AUTO_VEC-NEXT:    [[TMP5:%.*]] = getelementptr double, double* [[A:%.*]], i64 [[INDEX]]
-; AUTO_VEC-NEXT:    [[TMP6:%.*]] = bitcast double* [[TMP5]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[VEC_IND]], <4 x double>* [[TMP6]], align 8
-; AUTO_VEC-NEXT:    [[TMP7:%.*]] = getelementptr double, double* [[TMP5]], i64 4
-; AUTO_VEC-NEXT:    [[TMP8:%.*]] = bitcast double* [[TMP7]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD]], <4 x double>* [[TMP8]], align 8
-; AUTO_VEC-NEXT:    [[TMP9:%.*]] = getelementptr double, double* [[TMP5]], i64 8
-; AUTO_VEC-NEXT:    [[TMP10:%.*]] = bitcast double* [[TMP9]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD2]], <4 x double>* [[TMP10]], align 8
-; AUTO_VEC-NEXT:    [[TMP11:%.*]] = getelementptr double, double* [[TMP5]], i64 12
-; AUTO_VEC-NEXT:    [[TMP12:%.*]] = bitcast double* [[TMP11]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3]], <4 x double>* [[TMP12]], align 8
+; AUTO_VEC-NEXT:    [[TMP5:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]]
+; AUTO_VEC-NEXT:    store <4 x double> [[VEC_IND]], ptr [[TMP5]], align 8
+; AUTO_VEC-NEXT:    [[TMP7:%.*]] = getelementptr double, ptr [[TMP5]], i64 4
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD]], ptr [[TMP7]], align 8
+; AUTO_VEC-NEXT:    [[TMP9:%.*]] = getelementptr double, ptr [[TMP5]], i64 8
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD2]], ptr [[TMP9]], align 8
+; AUTO_VEC-NEXT:    [[TMP11:%.*]] = getelementptr double, ptr [[TMP5]], i64 12
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3]], ptr [[TMP11]], align 8
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT:%.*]] = or i64 [[INDEX]], 16
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 4.800000e+01, double 4.800000e+01, double 4.800000e+01, double 4.800000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD_1:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 6.000000e+01, double 6.000000e+01, double 6.000000e+01, double 6.000000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_1:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 7.200000e+01, double 7.200000e+01, double 7.200000e+01, double 7.200000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_1:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 8.400000e+01, double 8.400000e+01, double 8.400000e+01, double 8.400000e+01>
-; AUTO_VEC-NEXT:    [[TMP13:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_NEXT]]
-; AUTO_VEC-NEXT:    [[TMP14:%.*]] = bitcast double* [[TMP13]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[VEC_IND_NEXT]], <4 x double>* [[TMP14]], align 8
-; AUTO_VEC-NEXT:    [[TMP15:%.*]] = getelementptr double, double* [[TMP13]], i64 4
-; AUTO_VEC-NEXT:    [[TMP16:%.*]] = bitcast double* [[TMP15]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD_1]], <4 x double>* [[TMP16]], align 8
-; AUTO_VEC-NEXT:    [[TMP17:%.*]] = getelementptr double, double* [[TMP13]], i64 8
-; AUTO_VEC-NEXT:    [[TMP18:%.*]] = bitcast double* [[TMP17]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD2_1]], <4 x double>* [[TMP18]], align 8
-; AUTO_VEC-NEXT:    [[TMP19:%.*]] = getelementptr double, double* [[TMP13]], i64 12
-; AUTO_VEC-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP19]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3_1]], <4 x double>* [[TMP20]], align 8
+; AUTO_VEC-NEXT:    [[TMP13:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX_NEXT]]
+; AUTO_VEC-NEXT:    store <4 x double> [[VEC_IND_NEXT]], ptr [[TMP13]], align 8
+; AUTO_VEC-NEXT:    [[TMP15:%.*]] = getelementptr double, ptr [[TMP13]], i64 4
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD_1]], ptr [[TMP15]], align 8
+; AUTO_VEC-NEXT:    [[TMP17:%.*]] = getelementptr double, ptr [[TMP13]], i64 8
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD2_1]], ptr [[TMP17]], align 8
+; AUTO_VEC-NEXT:    [[TMP19:%.*]] = getelementptr double, ptr [[TMP13]], i64 12
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3_1]], ptr [[TMP19]], align 8
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_1:%.*]] = or i64 [[INDEX]], 32
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_1:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 9.600000e+01, double 9.600000e+01, double 9.600000e+01, double 9.600000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.080000e+02, double 1.080000e+02, double 1.080000e+02, double 1.080000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.200000e+02, double 1.200000e+02, double 1.200000e+02, double 1.200000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.320000e+02, double 1.320000e+02, double 1.320000e+02, double 1.320000e+02>
-; AUTO_VEC-NEXT:    [[TMP21:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_NEXT_1]]
-; AUTO_VEC-NEXT:    [[TMP22:%.*]] = bitcast double* [[TMP21]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[VEC_IND_NEXT_1]], <4 x double>* [[TMP22]], align 8
-; AUTO_VEC-NEXT:    [[TMP23:%.*]] = getelementptr double, double* [[TMP21]], i64 4
-; AUTO_VEC-NEXT:    [[TMP24:%.*]] = bitcast double* [[TMP23]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD_2]], <4 x double>* [[TMP24]], align 8
-; AUTO_VEC-NEXT:    [[TMP25:%.*]] = getelementptr double, double* [[TMP21]], i64 8
-; AUTO_VEC-NEXT:    [[TMP26:%.*]] = bitcast double* [[TMP25]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD2_2]], <4 x double>* [[TMP26]], align 8
-; AUTO_VEC-NEXT:    [[TMP27:%.*]] = getelementptr double, double* [[TMP21]], i64 12
-; AUTO_VEC-NEXT:    [[TMP28:%.*]] = bitcast double* [[TMP27]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3_2]], <4 x double>* [[TMP28]], align 8
+; AUTO_VEC-NEXT:    [[TMP21:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX_NEXT_1]]
+; AUTO_VEC-NEXT:    store <4 x double> [[VEC_IND_NEXT_1]], ptr [[TMP21]], align 8
+; AUTO_VEC-NEXT:    [[TMP23:%.*]] = getelementptr double, ptr [[TMP21]], i64 4
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD_2]], ptr [[TMP23]], align 8
+; AUTO_VEC-NEXT:    [[TMP25:%.*]] = getelementptr double, ptr [[TMP21]], i64 8
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD2_2]], ptr [[TMP25]], align 8
+; AUTO_VEC-NEXT:    [[TMP27:%.*]] = getelementptr double, ptr [[TMP21]], i64 12
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3_2]], ptr [[TMP27]], align 8
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_2:%.*]] = or i64 [[INDEX]], 48
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.440000e+02, double 1.440000e+02, double 1.440000e+02, double 1.440000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.560000e+02, double 1.560000e+02, double 1.560000e+02, double 1.560000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.680000e+02, double 1.680000e+02, double 1.680000e+02, double 1.680000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.800000e+02, double 1.800000e+02, double 1.800000e+02, double 1.800000e+02>
-; AUTO_VEC-NEXT:    [[TMP29:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_NEXT_2]]
-; AUTO_VEC-NEXT:    [[TMP30:%.*]] = bitcast double* [[TMP29]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[VEC_IND_NEXT_2]], <4 x double>* [[TMP30]], align 8
-; AUTO_VEC-NEXT:    [[TMP31:%.*]] = getelementptr double, double* [[TMP29]], i64 4
-; AUTO_VEC-NEXT:    [[TMP32:%.*]] = bitcast double* [[TMP31]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD_3]], <4 x double>* [[TMP32]], align 8
-; AUTO_VEC-NEXT:    [[TMP33:%.*]] = getelementptr double, double* [[TMP29]], i64 8
-; AUTO_VEC-NEXT:    [[TMP34:%.*]] = bitcast double* [[TMP33]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD2_3]], <4 x double>* [[TMP34]], align 8
-; AUTO_VEC-NEXT:    [[TMP35:%.*]] = getelementptr double, double* [[TMP29]], i64 12
-; AUTO_VEC-NEXT:    [[TMP36:%.*]] = bitcast double* [[TMP35]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3_3]], <4 x double>* [[TMP36]], align 8
+; AUTO_VEC-NEXT:    [[TMP29:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX_NEXT_2]]
+; AUTO_VEC-NEXT:    store <4 x double> [[VEC_IND_NEXT_2]], ptr [[TMP29]], align 8
+; AUTO_VEC-NEXT:    [[TMP31:%.*]] = getelementptr double, ptr [[TMP29]], i64 4
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD_3]], ptr [[TMP31]], align 8
+; AUTO_VEC-NEXT:    [[TMP33:%.*]] = getelementptr double, ptr [[TMP29]], i64 8
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD2_3]], ptr [[TMP33]], align 8
+; AUTO_VEC-NEXT:    [[TMP35:%.*]] = getelementptr double, ptr [[TMP29]], i64 12
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3_3]], ptr [[TMP35]], align 8
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_3]] = add nuw i64 [[INDEX]], 64
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_3]] = fadd fast <4 x double> [[VEC_IND]], <double 1.920000e+02, double 1.920000e+02, double 1.920000e+02, double 1.920000e+02>
 ; AUTO_VEC-NEXT:    [[NITER_NEXT_3]] = add i64 [[NITER]], 4
@@ -393,18 +357,14 @@ define double @external_use_with_fast_math(double* %a, i64 %n) {
 ; AUTO_VEC-NEXT:    [[STEP_ADD_EPIL:%.*]] = fadd fast <4 x double> [[VEC_IND_EPIL]], <double 1.200000e+01, double 1.200000e+01, double 1.200000e+01, double 1.200000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_EPIL:%.*]] = fadd fast <4 x double> [[VEC_IND_EPIL]], <double 2.400000e+01, double 2.400000e+01, double 2.400000e+01, double 2.400000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_EPIL:%.*]] = fadd fast <4 x double> [[VEC_IND_EPIL]], <double 3.600000e+01, double 3.600000e+01, double 3.600000e+01, double 3.600000e+01>
-; AUTO_VEC-NEXT:    [[TMP37:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_EPIL]]
-; AUTO_VEC-NEXT:    [[TMP38:%.*]] = bitcast double* [[TMP37]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[VEC_IND_EPIL]], <4 x double>* [[TMP38]], align 8
-; AUTO_VEC-NEXT:    [[TMP39:%.*]] = getelementptr double, double* [[TMP37]], i64 4
-; AUTO_VEC-NEXT:    [[TMP40:%.*]] = bitcast double* [[TMP39]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD_EPIL]], <4 x double>* [[TMP40]], align 8
-; AUTO_VEC-NEXT:    [[TMP41:%.*]] = getelementptr double, double* [[TMP37]], i64 8
-; AUTO_VEC-NEXT:    [[TMP42:%.*]] = bitcast double* [[TMP41]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD2_EPIL]], <4 x double>* [[TMP42]], align 8
-; AUTO_VEC-NEXT:    [[TMP43:%.*]] = getelementptr double, double* [[TMP37]], i64 12
-; AUTO_VEC-NEXT:    [[TMP44:%.*]] = bitcast double* [[TMP43]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3_EPIL]], <4 x double>* [[TMP44]], align 8
+; AUTO_VEC-NEXT:    [[TMP37:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX_EPIL]]
+; AUTO_VEC-NEXT:    store <4 x double> [[VEC_IND_EPIL]], ptr [[TMP37]], align 8
+; AUTO_VEC-NEXT:    [[TMP39:%.*]] = getelementptr double, ptr [[TMP37]], i64 4
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD_EPIL]], ptr [[TMP39]], align 8
+; AUTO_VEC-NEXT:    [[TMP41:%.*]] = getelementptr double, ptr [[TMP37]], i64 8
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD2_EPIL]], ptr [[TMP41]], align 8
+; AUTO_VEC-NEXT:    [[TMP43:%.*]] = getelementptr double, ptr [[TMP37]], i64 12
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3_EPIL]], ptr [[TMP43]], align 8
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_EPIL]] = add nuw i64 [[INDEX_EPIL]], 16
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_EPIL]] = fadd fast <4 x double> [[VEC_IND_EPIL]], <double 4.800000e+01, double 4.800000e+01, double 4.800000e+01, double 4.800000e+01>
 ; AUTO_VEC-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
@@ -419,8 +379,8 @@ define double @external_use_with_fast_math(double* %a, i64 %n) {
 ; AUTO_VEC:       for.body:
 ; AUTO_VEC-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
 ; AUTO_VEC-NEXT:    [[J:%.*]] = phi double [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ 0.000000e+00, [[ENTRY]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ]
-; AUTO_VEC-NEXT:    [[T0:%.*]] = getelementptr double, double* [[A]], i64 [[I]]
-; AUTO_VEC-NEXT:    store double [[J]], double* [[T0]], align 8
+; AUTO_VEC-NEXT:    [[T0:%.*]] = getelementptr double, ptr [[A]], i64 [[I]]
+; AUTO_VEC-NEXT:    store double [[J]], ptr [[T0]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; AUTO_VEC-NEXT:    [[J_NEXT]] = fadd fast double [[J]], 3.000000e+00
 ; AUTO_VEC-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[I_NEXT]], [[SMAX]]
@@ -435,8 +395,8 @@ entry:
 for.body:
   %i = phi i64 [ 0, %entry ], [%i.next, %for.body]
   %j = phi double [ 0.0, %entry ], [ %j.next, %for.body ]
-  %t0 = getelementptr double, double* %a, i64 %i
-  store double %j, double* %t0
+  %t0 = getelementptr double, ptr %a, i64 %i
+  store double %j, ptr %t0
   %i.next = add i64 %i, 1
   %j.next = fadd fast double %j, 3.0
   %cond = icmp slt i64 %i.next, %n
@@ -447,7 +407,7 @@ for.end:
   ret double %t1
 }
 
-define double @external_use_without_fast_math(double* %a, i64 %n) {
+define double @external_use_without_fast_math(ptr %a, i64 %n) {
 ; AUTO_VEC-LABEL: @external_use_without_fast_math(
 ; AUTO_VEC-NEXT:  entry:
 ; AUTO_VEC-NEXT:    [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
@@ -462,36 +422,36 @@ define double @external_use_without_fast_math(double* %a, i64 %n) {
 ; AUTO_VEC-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[I_NEXT_7:%.*]], [[FOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[J:%.*]] = phi double [ 0.000000e+00, [[ENTRY_NEW]] ], [ [[J_NEXT_7:%.*]], [[FOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY]] ]
-; AUTO_VEC-NEXT:    [[T0:%.*]] = getelementptr double, double* [[A:%.*]], i64 [[I]]
-; AUTO_VEC-NEXT:    store double [[J]], double* [[T0]], align 8
+; AUTO_VEC-NEXT:    [[T0:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[I]]
+; AUTO_VEC-NEXT:    store double [[J]], ptr [[T0]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT:%.*]] = or i64 [[I]], 1
 ; AUTO_VEC-NEXT:    [[J_NEXT:%.*]] = fadd double [[J]], 3.000000e+00
-; AUTO_VEC-NEXT:    [[T0_1:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT]]
-; AUTO_VEC-NEXT:    store double [[J_NEXT]], double* [[T0_1]], align 8
+; AUTO_VEC-NEXT:    [[T0_1:%.*]] = getelementptr double, ptr [[A]], i64 [[I_NEXT]]
+; AUTO_VEC-NEXT:    store double [[J_NEXT]], ptr [[T0_1]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT_1:%.*]] = or i64 [[I]], 2
 ; AUTO_VEC-NEXT:    [[J_NEXT_1:%.*]] = fadd double [[J_NEXT]], 3.000000e+00
-; AUTO_VEC-NEXT:    [[T0_2:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT_1]]
-; AUTO_VEC-NEXT:    store double [[J_NEXT_1]], double* [[T0_2]], align 8
+; AUTO_VEC-NEXT:    [[T0_2:%.*]] = getelementptr double, ptr [[A]], i64 [[I_NEXT_1]]
+; AUTO_VEC-NEXT:    store double [[J_NEXT_1]], ptr [[T0_2]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT_2:%.*]] = or i64 [[I]], 3
 ; AUTO_VEC-NEXT:    [[J_NEXT_2:%.*]] = fadd double [[J_NEXT_1]], 3.000000e+00
-; AUTO_VEC-NEXT:    [[T0_3:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT_2]]
-; AUTO_VEC-NEXT:    store double [[J_NEXT_2]], double* [[T0_3]], align 8
+; AUTO_VEC-NEXT:    [[T0_3:%.*]] = getelementptr double, ptr [[A]], i64 [[I_NEXT_2]]
+; AUTO_VEC-NEXT:    store double [[J_NEXT_2]], ptr [[T0_3]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT_3:%.*]] = or i64 [[I]], 4
 ; AUTO_VEC-NEXT:    [[J_NEXT_3:%.*]] = fadd double [[J_NEXT_2]], 3.000000e+00
-; AUTO_VEC-NEXT:    [[T0_4:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT_3]]
-; AUTO_VEC-NEXT:    store double [[J_NEXT_3]], double* [[T0_4]], align 8
+; AUTO_VEC-NEXT:    [[T0_4:%.*]] = getelementptr double, ptr [[A]], i64 [[I_NEXT_3]]
+; AUTO_VEC-NEXT:    store double [[J_NEXT_3]], ptr [[T0_4]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT_4:%.*]] = or i64 [[I]], 5
 ; AUTO_VEC-NEXT:    [[J_NEXT_4:%.*]] = fadd double [[J_NEXT_3]], 3.000000e+00
-; AUTO_VEC-NEXT:    [[T0_5:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT_4]]
-; AUTO_VEC-NEXT:    store double [[J_NEXT_4]], double* [[T0_5]], align 8
+; AUTO_VEC-NEXT:    [[T0_5:%.*]] = getelementptr double, ptr [[A]], i64 [[I_NEXT_4]]
+; AUTO_VEC-NEXT:    store double [[J_NEXT_4]], ptr [[T0_5]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT_5:%.*]] = or i64 [[I]], 6
 ; AUTO_VEC-NEXT:    [[J_NEXT_5:%.*]] = fadd double [[J_NEXT_4]], 3.000000e+00
-; AUTO_VEC-NEXT:    [[T0_6:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT_5]]
-; AUTO_VEC-NEXT:    store double [[J_NEXT_5]], double* [[T0_6]], align 8
+; AUTO_VEC-NEXT:    [[T0_6:%.*]] = getelementptr double, ptr [[A]], i64 [[I_NEXT_5]]
+; AUTO_VEC-NEXT:    store double [[J_NEXT_5]], ptr [[T0_6]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT_6:%.*]] = or i64 [[I]], 7
 ; AUTO_VEC-NEXT:    [[J_NEXT_6:%.*]] = fadd double [[J_NEXT_5]], 3.000000e+00
-; AUTO_VEC-NEXT:    [[T0_7:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT_6]]
-; AUTO_VEC-NEXT:    store double [[J_NEXT_6]], double* [[T0_7]], align 8
+; AUTO_VEC-NEXT:    [[T0_7:%.*]] = getelementptr double, ptr [[A]], i64 [[I_NEXT_6]]
+; AUTO_VEC-NEXT:    store double [[J_NEXT_6]], ptr [[T0_7]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT_7]] = add nuw nsw i64 [[I]], 8
 ; AUTO_VEC-NEXT:    [[J_NEXT_7]] = fadd double [[J_NEXT_6]], 3.000000e+00
 ; AUTO_VEC-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
@@ -507,8 +467,8 @@ define double @external_use_without_fast_math(double* %a, i64 %n) {
 ; AUTO_VEC-NEXT:    [[I_EPIL:%.*]] = phi i64 [ [[I_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[I_UNR]], [[FOR_END_UNR_LCSSA]] ]
 ; AUTO_VEC-NEXT:    [[J_EPIL:%.*]] = phi double [ [[J_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[J_UNR]], [[FOR_END_UNR_LCSSA]] ]
 ; AUTO_VEC-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[FOR_BODY_EPIL]] ], [ 0, [[FOR_END_UNR_LCSSA]] ]
-; AUTO_VEC-NEXT:    [[T0_EPIL:%.*]] = getelementptr double, double* [[A]], i64 [[I_EPIL]]
-; AUTO_VEC-NEXT:    store double [[J_EPIL]], double* [[T0_EPIL]], align 8
+; AUTO_VEC-NEXT:    [[T0_EPIL:%.*]] = getelementptr double, ptr [[A]], i64 [[I_EPIL]]
+; AUTO_VEC-NEXT:    store double [[J_EPIL]], ptr [[T0_EPIL]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT_EPIL]] = add nuw nsw i64 [[I_EPIL]], 1
 ; AUTO_VEC-NEXT:    [[J_NEXT_EPIL]] = fadd double [[J_EPIL]], 3.000000e+00
 ; AUTO_VEC-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
@@ -524,8 +484,8 @@ entry:
 for.body:
   %i = phi i64 [ 0, %entry ], [%i.next, %for.body]
   %j = phi double [ 0.0, %entry ], [ %j.next, %for.body ]
-  %t0 = getelementptr double, double* %a, i64 %i
-  store double %j, double* %t0
+  %t0 = getelementptr double, ptr %a, i64 %i
+  store double %j, ptr %t0
   %i.next = add i64 %i, 1
   %j.next = fadd double %j, 3.0
   %cond = icmp slt i64 %i.next, %n
@@ -536,7 +496,7 @@ for.end:
   ret double %t1
 }
 
-;;  void fadd_induction(float *p, unsigned N) {
+;;  void fadd_induction(ptr p, unsigned N) {
 ;;    float x = 1.0f;
 ;;    for (unsigned i=0; i!=N; ++i) {
 ;;      p[i] = p[i] + x;
@@ -544,7 +504,7 @@ for.end:
 ;;    }
 ;;  }
 
-define void @fadd_reassoc_FMF(float* nocapture %p, i32 %N) {
+define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
 ; AUTO_VEC-LABEL: @fadd_reassoc_FMF(
 ; AUTO_VEC-NEXT:  entry:
 ; AUTO_VEC-NEXT:    [[CMP_NOT11:%.*]] = icmp eq i32 [[N:%.*]], 0
@@ -574,59 +534,43 @@ define void @fadd_reassoc_FMF(float* nocapture %p, i32 %N) {
 ; AUTO_VEC-NEXT:    [[STEP_ADD:%.*]] = fadd reassoc <8 x float> [[VEC_IND]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
-; AUTO_VEC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 [[INDEX]]
-; AUTO_VEC-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x float>, <8 x float>* [[TMP7]], align 4
-; AUTO_VEC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 8
-; AUTO_VEC-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD5:%.*]] = load <8 x float>, <8 x float>* [[TMP9]], align 4
-; AUTO_VEC-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 16
-; AUTO_VEC-NEXT:    [[TMP11:%.*]] = bitcast float* [[TMP10]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD6:%.*]] = load <8 x float>, <8 x float>* [[TMP11]], align 4
-; AUTO_VEC-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 24
-; AUTO_VEC-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP12]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD7:%.*]] = load <8 x float>, <8 x float>* [[TMP13]], align 4
+; AUTO_VEC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[P:%.*]], i64 [[INDEX]]
+; AUTO_VEC-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP6]], align 4
+; AUTO_VEC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 8
+; AUTO_VEC-NEXT:    [[WIDE_LOAD5:%.*]] = load <8 x float>, ptr [[TMP8]], align 4
+; AUTO_VEC-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 16
+; AUTO_VEC-NEXT:    [[WIDE_LOAD6:%.*]] = load <8 x float>, ptr [[TMP10]], align 4
+; AUTO_VEC-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 24
+; AUTO_VEC-NEXT:    [[WIDE_LOAD7:%.*]] = load <8 x float>, ptr [[TMP12]], align 4
 ; AUTO_VEC-NEXT:    [[TMP14:%.*]] = fadd reassoc <8 x float> [[VEC_IND]], [[WIDE_LOAD]]
 ; AUTO_VEC-NEXT:    [[TMP15:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], [[WIDE_LOAD5]]
 ; AUTO_VEC-NEXT:    [[TMP16:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2]], [[WIDE_LOAD6]]
 ; AUTO_VEC-NEXT:    [[TMP17:%.*]] = fadd reassoc <8 x float> [[STEP_ADD3]], [[WIDE_LOAD7]]
-; AUTO_VEC-NEXT:    [[TMP18:%.*]] = bitcast float* [[TMP6]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP14]], <8 x float>* [[TMP18]], align 4
-; AUTO_VEC-NEXT:    [[TMP19:%.*]] = bitcast float* [[TMP8]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP15]], <8 x float>* [[TMP19]], align 4
-; AUTO_VEC-NEXT:    [[TMP20:%.*]] = bitcast float* [[TMP10]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP16]], <8 x float>* [[TMP20]], align 4
-; AUTO_VEC-NEXT:    [[TMP21:%.*]] = bitcast float* [[TMP12]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP17]], <8 x float>* [[TMP21]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP14]], ptr [[TMP6]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP15]], ptr [[TMP8]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP16]], ptr [[TMP10]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP17]], ptr [[TMP12]], align 4
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT:%.*]] = or i64 [[INDEX]], 32
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT:%.*]] = fadd reassoc <8 x float> [[STEP_ADD3]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD_1:%.*]] = fadd reassoc <8 x float> [[VEC_IND_NEXT]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_1:%.*]] = fadd reassoc <8 x float> [[STEP_ADD_1]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_1:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2_1]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
-; AUTO_VEC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[P]], i64 [[INDEX_NEXT]]
-; AUTO_VEC-NEXT:    [[TMP23:%.*]] = bitcast float* [[TMP22]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD_1:%.*]] = load <8 x float>, <8 x float>* [[TMP23]], align 4
-; AUTO_VEC-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 8
-; AUTO_VEC-NEXT:    [[TMP25:%.*]] = bitcast float* [[TMP24]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD5_1:%.*]] = load <8 x float>, <8 x float>* [[TMP25]], align 4
-; AUTO_VEC-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 16
-; AUTO_VEC-NEXT:    [[TMP27:%.*]] = bitcast float* [[TMP26]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD6_1:%.*]] = load <8 x float>, <8 x float>* [[TMP27]], align 4
-; AUTO_VEC-NEXT:    [[TMP28:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 24
-; AUTO_VEC-NEXT:    [[TMP29:%.*]] = bitcast float* [[TMP28]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD7_1:%.*]] = load <8 x float>, <8 x float>* [[TMP29]], align 4
+; AUTO_VEC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[INDEX_NEXT]]
+; AUTO_VEC-NEXT:    [[WIDE_LOAD_1:%.*]] = load <8 x float>, ptr [[TMP22]], align 4
+; AUTO_VEC-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 8
+; AUTO_VEC-NEXT:    [[WIDE_LOAD5_1:%.*]] = load <8 x float>, ptr [[TMP24]], align 4
+; AUTO_VEC-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 16
+; AUTO_VEC-NEXT:    [[WIDE_LOAD6_1:%.*]] = load <8 x float>, ptr [[TMP26]], align 4
+; AUTO_VEC-NEXT:    [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 24
+; AUTO_VEC-NEXT:    [[WIDE_LOAD7_1:%.*]] = load <8 x float>, ptr [[TMP28]], align 4
 ; AUTO_VEC-NEXT:    [[TMP30:%.*]] = fadd reassoc <8 x float> [[VEC_IND_NEXT]], [[WIDE_LOAD_1]]
 ; AUTO_VEC-NEXT:    [[TMP31:%.*]] = fadd reassoc <8 x float> [[STEP_ADD_1]], [[WIDE_LOAD5_1]]
 ; AUTO_VEC-NEXT:    [[TMP32:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2_1]], [[WIDE_LOAD6_1]]
 ; AUTO_VEC-NEXT:    [[TMP33:%.*]] = fadd reassoc <8 x float> [[STEP_ADD3_1]], [[WIDE_LOAD7_1]]
-; AUTO_VEC-NEXT:    [[TMP34:%.*]] = bitcast float* [[TMP22]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP30]], <8 x float>* [[TMP34]], align 4
-; AUTO_VEC-NEXT:    [[TMP35:%.*]] = bitcast float* [[TMP24]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP31]], <8 x float>* [[TMP35]], align 4
-; AUTO_VEC-NEXT:    [[TMP36:%.*]] = bitcast float* [[TMP26]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP32]], <8 x float>* [[TMP36]], align 4
-; AUTO_VEC-NEXT:    [[TMP37:%.*]] = bitcast float* [[TMP28]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP33]], <8 x float>* [[TMP37]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP30]], ptr [[TMP22]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP31]], ptr [[TMP24]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP32]], ptr [[TMP26]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP33]], ptr [[TMP28]], align 4
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_1]] = add nuw i64 [[INDEX]], 64
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_1]] = fadd reassoc <8 x float> [[STEP_ADD3_1]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
 ; AUTO_VEC-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
@@ -641,30 +585,22 @@ define void @fadd_reassoc_FMF(float* nocapture %p, i32 %N) {
 ; AUTO_VEC-NEXT:    [[STEP_ADD_EPIL:%.*]] = fadd reassoc <8 x float> [[VEC_IND_UNR]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_EPIL:%.*]] = fadd reassoc <8 x float> [[STEP_ADD_EPIL]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_EPIL:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2_EPIL]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
-; AUTO_VEC-NEXT:    [[TMP38:%.*]] = getelementptr inbounds float, float* [[P]], i64 [[INDEX_UNR]]
-; AUTO_VEC-NEXT:    [[TMP39:%.*]] = bitcast float* [[TMP38]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD_EPIL:%.*]] = load <8 x float>, <8 x float>* [[TMP39]], align 4
-; AUTO_VEC-NEXT:    [[TMP40:%.*]] = getelementptr inbounds float, float* [[TMP38]], i64 8
-; AUTO_VEC-NEXT:    [[TMP41:%.*]] = bitcast float* [[TMP40]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD5_EPIL:%.*]] = load <8 x float>, <8 x float>* [[TMP41]], align 4
-; AUTO_VEC-NEXT:    [[TMP42:%.*]] = getelementptr inbounds float, float* [[TMP38]], i64 16
-; AUTO_VEC-NEXT:    [[TMP43:%.*]] = bitcast float* [[TMP42]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD6_EPIL:%.*]] = load <8 x float>, <8 x float>* [[TMP43]], align 4
-; AUTO_VEC-NEXT:    [[TMP44:%.*]] = getelementptr inbounds float, float* [[TMP38]], i64 24
-; AUTO_VEC-NEXT:    [[TMP45:%.*]] = bitcast float* [[TMP44]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD7_EPIL:%.*]] = load <8 x float>, <8 x float>* [[TMP45]], align 4
+; AUTO_VEC-NEXT:    [[TMP38:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[INDEX_UNR]]
+; AUTO_VEC-NEXT:    [[WIDE_LOAD_EPIL:%.*]] = load <8 x float>, ptr [[TMP38]], align 4
+; AUTO_VEC-NEXT:    [[TMP40:%.*]] = getelementptr inbounds float, ptr [[TMP38]], i64 8
+; AUTO_VEC-NEXT:    [[WIDE_LOAD5_EPIL:%.*]] = load <8 x float>, ptr [[TMP40]], align 4
+; AUTO_VEC-NEXT:    [[TMP42:%.*]] = getelementptr inbounds float, ptr [[TMP38]], i64 16
+; AUTO_VEC-NEXT:    [[WIDE_LOAD6_EPIL:%.*]] = load <8 x float>, ptr [[TMP42]], align 4
+; AUTO_VEC-NEXT:    [[TMP44:%.*]] = getelementptr inbounds float, ptr [[TMP38]], i64 24
+; AUTO_VEC-NEXT:    [[WIDE_LOAD7_EPIL:%.*]] = load <8 x float>, ptr [[TMP44]], align 4
 ; AUTO_VEC-NEXT:    [[TMP46:%.*]] = fadd reassoc <8 x float> [[VEC_IND_UNR]], [[WIDE_LOAD_EPIL]]
 ; AUTO_VEC-NEXT:    [[TMP47:%.*]] = fadd reassoc <8 x float> [[STEP_ADD_EPIL]], [[WIDE_LOAD5_EPIL]]
 ; AUTO_VEC-NEXT:    [[TMP48:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2_EPIL]], [[WIDE_LOAD6_EPIL]]
 ; AUTO_VEC-NEXT:    [[TMP49:%.*]] = fadd reassoc <8 x float> [[STEP_ADD3_EPIL]], [[WIDE_LOAD7_EPIL]]
-; AUTO_VEC-NEXT:    [[TMP50:%.*]] = bitcast float* [[TMP38]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP46]], <8 x float>* [[TMP50]], align 4
-; AUTO_VEC-NEXT:    [[TMP51:%.*]] = bitcast float* [[TMP40]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP47]], <8 x float>* [[TMP51]], align 4
-; AUTO_VEC-NEXT:    [[TMP52:%.*]] = bitcast float* [[TMP42]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP48]], <8 x float>* [[TMP52]], align 4
-; AUTO_VEC-NEXT:    [[TMP53:%.*]] = bitcast float* [[TMP44]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP49]], <8 x float>* [[TMP53]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP46]], ptr [[TMP38]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP47]], ptr [[TMP40]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP48]], ptr [[TMP42]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP49]], ptr [[TMP44]], align 4
 ; AUTO_VEC-NEXT:    br label [[MIDDLE_BLOCK]]
 ; AUTO_VEC:       middle.block:
 ; AUTO_VEC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
@@ -674,10 +610,10 @@ define void @fadd_reassoc_FMF(float* nocapture %p, i32 %N) {
 ; AUTO_VEC:       for.body:
 ; AUTO_VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
 ; AUTO_VEC-NEXT:    [[X_012:%.*]] = phi float [ [[ADD3:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
-; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[P]], i64 [[INDVARS_IV]]
-; AUTO_VEC-NEXT:    [[TMP54:%.*]] = load float, float* [[ARRAYIDX]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT:    [[TMP54:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; AUTO_VEC-NEXT:    [[ADD:%.*]] = fadd reassoc float [[X_012]], [[TMP54]]
-; AUTO_VEC-NEXT:    store float [[ADD]], float* [[ARRAYIDX]], align 4
+; AUTO_VEC-NEXT:    store float [[ADD]], ptr [[ARRAYIDX]], align 4
 ; AUTO_VEC-NEXT:    [[ADD3]] = fadd reassoc float [[X_012]], 4.200000e+01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; AUTO_VEC-NEXT:    [[CMP_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]]
@@ -697,10 +633,10 @@ for.cond.cleanup:
 for.body:
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
   %x.012 = phi float [ 1.000000e+00, %for.body.preheader ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %p, i64 %indvars.iv
-  %1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %p, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx, align 4
   %add = fadd reassoc float %x.012, %1
-  store float %add, float* %arrayidx, align 4
+  store float %add, ptr %arrayidx, align 4
   %add3 = fadd reassoc float %x.012, 4.200000e+01
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %cmp.not = icmp eq i64 %indvars.iv.next, %0

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll
index 9031b8703ea90..dae341bcf5380 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll
@@ -8,15 +8,15 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %neg = fneg float %{{.*}}
 ; CHECK: Found an estimated cost of 1 for VF 2 For instruction:   %neg = fneg float %{{.*}}
 ; CHECK: Found an estimated cost of 1 for VF 4 For instruction:   %neg = fneg float %{{.*}}
-define void @fneg_cost(float* %a, i64 %n) {
+define void @fneg_cost(ptr %a, i64 %n) {
 entry:
   br label %for.body
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %neg = fneg float %0
-  store float %neg, float* %arrayidx, align 4
+  store float %neg, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %cmp = icmp eq i64 %indvars.iv.next, %n
   br i1 %cmp, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll
index 8b7d51e825af1..4d92c1a3cf424 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll
@@ -20,11 +20,11 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds [10000 x float], [10000 x float]* @float_array, i64 0, i64 %indvars.iv
-  %1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [10000 x float], ptr @float_array, i64 0, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx, align 4
   %conv = fptoui float %1 to i32
-  %arrayidx2 = getelementptr inbounds [10000 x i32], [10000 x i32]* @unsigned_array, i64 0, i64 %indvars.iv
-  store i32 %conv, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds [10000 x i32], ptr @unsigned_array, i64 0, i64 %indvars.iv
+  store i32 %conv, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %N

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
index b54ec8a86d6c2..03783d3a6c9fb 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
@@ -13,7 +13,7 @@ target triple = "x86_64-apple-macosx"
 
 define void @convert() {
 entry:
-  %0 = load i32, i32* @n, align 4
+  %0 = load i32, ptr @n, align 4
   %cmp4 = icmp eq i32 %0, 0
   br i1 %cmp4, label %for.end, label %for.body.preheader
 
@@ -22,11 +22,11 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds [10000 x double], [10000 x double]* @double_array, i64 0, i64 %indvars.iv
-  %1 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds [10000 x double], ptr @double_array, i64 0, i64 %indvars.iv
+  %1 = load double, ptr %arrayidx, align 8
   %conv = fptoui double %1 to i32
-  %arrayidx2 = getelementptr inbounds [10000 x i32], [10000 x i32]* @unsigned_array, i64 0, i64 %indvars.iv
-  store i32 %conv, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds [10000 x i32], ptr @unsigned_array, i64 0, i64 %indvars.iv
+  store i32 %conv, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %2 = trunc i64 %indvars.iv.next to i32
   %cmp = icmp ult i32 %2, %0

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
index 3d77758b565e5..4f7dee2b11365 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
@@ -6,16 +6,16 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 
 ; CHECK: cost of 1 for VF 1 For instruction:   %conv = fptosi float %tmp to i8
-define void @float_to_sint8_cost(i8* noalias nocapture %a, float* noalias nocapture readonly %b) nounwind {
+define void @float_to_sint8_cost(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) nounwind {
 entry:
   br label %for.body
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %tmp = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %tmp = load float, ptr %arrayidx, align 4
   %conv = fptosi float %tmp to i8
-  %arrayidx2 = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
-  store i8 %conv, i8* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
+  store i8 %conv, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 256
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/funclet.ll b/llvm/test/Transforms/LoopVectorize/X86/funclet.ll
index 940cba11db3c6..023728abe8a64 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/funclet.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/funclet.ll
@@ -2,16 +2,16 @@
 target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
 target triple = "i686-pc-windows-msvc18.0.0"
 
-define void @test1() #0 personality i32 (...)* @__CxxFrameHandler3 {
+define void @test1() #0 personality ptr @__CxxFrameHandler3 {
 entry:
-  invoke void @_CxxThrowException(i8* null, i8* null)
+  invoke void @_CxxThrowException(ptr null, ptr null)
           to label %unreachable unwind label %catch.dispatch
 
 catch.dispatch:                                   ; preds = %entry
   %0 = catchswitch within none [label %catch] unwind to caller
 
 catch:                                            ; preds = %catch.dispatch
-  %1 = catchpad within %0 [i8* null, i32 64, i8* null]
+  %1 = catchpad within %0 [ptr null, i32 64, ptr null]
   br label %for.body
 
 for.cond.cleanup:                                 ; preds = %for.body
@@ -32,10 +32,10 @@ unreachable:                                      ; preds = %entry
 }
 
 ; CHECK-LABEL: define void @test1(
-; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [i8* null, i32 64, i8* null]
+; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [ptr null, i32 64, ptr null]
 ; CHECK: call <8 x double> @llvm.floor.v8f64(<8 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ]
 
-declare x86_stdcallcc void @_CxxThrowException(i8*, i8*)
+declare x86_stdcallcc void @_CxxThrowException(ptr, ptr)
 
 declare i32 @__CxxFrameHandler3(...)
 

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll
index b817d58f131d0..3457d59b60b54 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll
@@ -30,32 +30,32 @@ define void @_Z4testmm(i64 %size, i64 %offset) {
 ; CHECK-NEXT:    [[B_054:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD30:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[V_055]], [[OFFSET:%.*]]
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[ADD]], 3
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 [[MUL]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [512 x float], [512 x float]* @kernel, i64 0, i64 [[V_055]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 [[MUL]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [512 x float], ptr @kernel, i64 0, i64 [[V_055]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[MUL3:%.*]] = fmul fast float [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [512 x float], [512 x float]* @kernel2, i64 0, i64 [[V_055]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [512 x float], ptr @kernel2, i64 0, i64 [[V_055]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
 ; CHECK-NEXT:    [[MUL5:%.*]] = fmul fast float [[MUL3]], [[TMP2]]
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [512 x float], [512 x float]* @kernel3, i64 0, i64 [[V_055]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [512 x float], ptr @kernel3, i64 0, i64 [[V_055]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
 ; CHECK-NEXT:    [[MUL7:%.*]] = fmul fast float [[MUL5]], [[TMP3]]
-; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds [512 x float], [512 x float]* @kernel4, i64 0, i64 [[V_055]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load float, float* [[ARRAYIDX8]], align 4
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds [512 x float], ptr @kernel4, i64 0, i64 [[V_055]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[ARRAYIDX8]], align 4
 ; CHECK-NEXT:    [[MUL9:%.*]] = fmul fast float [[MUL7]], [[TMP4]]
 ; CHECK-NEXT:    [[ADD10]] = fadd fast float [[R_057]], [[MUL9]]
 ; CHECK-NEXT:    [[ARRAYIDX_SUM:%.*]] = add i64 [[MUL]], 1
-; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 [[ARRAYIDX_SUM]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load float, float* [[ARRAYIDX11]], align 4
+; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 [[ARRAYIDX_SUM]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[ARRAYIDX11]], align 4
 ; CHECK-NEXT:    [[MUL13:%.*]] = fmul fast float [[TMP1]], [[TMP5]]
 ; CHECK-NEXT:    [[MUL15:%.*]] = fmul fast float [[TMP2]], [[MUL13]]
 ; CHECK-NEXT:    [[MUL17:%.*]] = fmul fast float [[TMP3]], [[MUL15]]
 ; CHECK-NEXT:    [[MUL19:%.*]] = fmul fast float [[TMP4]], [[MUL17]]
 ; CHECK-NEXT:    [[ADD20]] = fadd fast float [[G_056]], [[MUL19]]
 ; CHECK-NEXT:    [[ARRAYIDX_SUM52:%.*]] = add i64 [[MUL]], 2
-; CHECK-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 [[ARRAYIDX_SUM52]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load float, float* [[ARRAYIDX21]], align 4
+; CHECK-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 [[ARRAYIDX_SUM52]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[ARRAYIDX21]], align 4
 ; CHECK-NEXT:    [[MUL23:%.*]] = fmul fast float [[TMP1]], [[TMP6]]
 ; CHECK-NEXT:    [[MUL25:%.*]] = fmul fast float [[TMP2]], [[MUL23]]
 ; CHECK-NEXT:    [[MUL27:%.*]] = fmul fast float [[TMP3]], [[MUL25]]
@@ -76,9 +76,9 @@ define void @_Z4testmm(i64 %size, i64 %offset) {
 ; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i8 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[G_0_LCSSA:%.*]] = phi i8 [ [[PHITMP60]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY]] ]
 ; CHECK-NEXT:    [[B_0_LCSSA:%.*]] = phi i8 [ [[PHITMP61]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    store i8 [[R_0_LCSSA]], i8* @r_, align 1
-; CHECK-NEXT:    store i8 [[G_0_LCSSA]], i8* @g_, align 1
-; CHECK-NEXT:    store i8 [[B_0_LCSSA]], i8* @b_, align 1
+; CHECK-NEXT:    store i8 [[R_0_LCSSA]], ptr @r_, align 1
+; CHECK-NEXT:    store i8 [[G_0_LCSSA]], ptr @g_, align 1
+; CHECK-NEXT:    store i8 [[B_0_LCSSA]], ptr @b_, align 1
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -95,32 +95,32 @@ for.body:
   %b.054 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add30, %for.body ]
   %add = add i64 %v.055, %offset
   %mul = mul i64 %add, 3
-  %arrayidx = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %mul
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds [512 x float], [512 x float]* @kernel, i64 0, i64 %v.055
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 %mul
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds [512 x float], ptr @kernel, i64 0, i64 %v.055
+  %1 = load float, ptr %arrayidx2, align 4
   %mul3 = fmul fast float %0, %1
-  %arrayidx4 = getelementptr inbounds [512 x float], [512 x float]* @kernel2, i64 0, i64 %v.055
-  %2 = load float, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds [512 x float], ptr @kernel2, i64 0, i64 %v.055
+  %2 = load float, ptr %arrayidx4, align 4
   %mul5 = fmul fast float %mul3, %2
-  %arrayidx6 = getelementptr inbounds [512 x float], [512 x float]* @kernel3, i64 0, i64 %v.055
-  %3 = load float, float* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds [512 x float], ptr @kernel3, i64 0, i64 %v.055
+  %3 = load float, ptr %arrayidx6, align 4
   %mul7 = fmul fast float %mul5, %3
-  %arrayidx8 = getelementptr inbounds [512 x float], [512 x float]* @kernel4, i64 0, i64 %v.055
-  %4 = load float, float* %arrayidx8, align 4
+  %arrayidx8 = getelementptr inbounds [512 x float], ptr @kernel4, i64 0, i64 %v.055
+  %4 = load float, ptr %arrayidx8, align 4
   %mul9 = fmul fast float %mul7, %4
   %add10 = fadd fast float %r.057, %mul9
   %arrayidx.sum = add i64 %mul, 1
-  %arrayidx11 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum
-  %5 = load float, float* %arrayidx11, align 4
+  %arrayidx11 = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 %arrayidx.sum
+  %5 = load float, ptr %arrayidx11, align 4
   %mul13 = fmul fast float %1, %5
   %mul15 = fmul fast float %2, %mul13
   %mul17 = fmul fast float %3, %mul15
   %mul19 = fmul fast float %4, %mul17
   %add20 = fadd fast float %g.056, %mul19
   %arrayidx.sum52 = add i64 %mul, 2
-  %arrayidx21 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum52
-  %6 = load float, float* %arrayidx21, align 4
+  %arrayidx21 = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 %arrayidx.sum52
+  %6 = load float, ptr %arrayidx21, align 4
   %mul23 = fmul fast float %1, %6
   %mul25 = fmul fast float %2, %mul23
   %mul27 = fmul fast float %3, %mul25
@@ -143,8 +143,8 @@ for.end:
   %r.0.lcssa = phi i8 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
   %g.0.lcssa = phi i8 [ %phitmp60, %for.cond.for.end_crit_edge ], [ 0, %entry ]
   %b.0.lcssa = phi i8 [ %phitmp61, %for.cond.for.end_crit_edge ], [ 0, %entry ]
-  store i8 %r.0.lcssa, i8* @r_, align 1
-  store i8 %g.0.lcssa, i8* @g_, align 1
-  store i8 %b.0.lcssa, i8* @b_, align 1
+  store i8 %r.0.lcssa, ptr @r_, align 1
+  store i8 %g.0.lcssa, ptr @g_, align 1
+  store i8 %b.0.lcssa, ptr @b_, align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/gather-vs-interleave.ll b/llvm/test/Transforms/LoopVectorize/X86/gather-vs-interleave.ll
index ab03e911ccc3b..937ff6ff10791 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/gather-vs-interleave.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gather-vs-interleave.ll
@@ -26,11 +26,11 @@ define void @strided_load_i64() {
 
 ; <label>:1:                                      ; preds = %0, %1
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [10240 x i64], [10240 x i64]* @A, i64 0, i64 %indvars.iv
-  %3 = load i64, i64* %2, align 16
+  %2 = getelementptr inbounds [10240 x i64], ptr @A, i64 0, i64 %indvars.iv
+  %3 = load i64, ptr %2, align 16
   %4 = add i64 %3, 5
-  %5 = getelementptr inbounds [10240 x i64], [10240 x i64]* @B, i64 0, i64 %indvars.iv
-  store i64 %4, i64* %5, align 16
+  %5 = getelementptr inbounds [10240 x i64], ptr @B, i64 0, i64 %indvars.iv
+  store i64 %4, ptr %5, align 16
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 8
   %6 = icmp slt i64 %indvars.iv.next, 1024
   br i1 %6, label %1, label %7

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll
index a58fd66a93205..07a98da5ab1c0 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll
@@ -28,13 +28,13 @@ define void @example1() nounwind uwtable ssp {
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %6, i32* %7, align 4
+  %7 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %6, ptr %7, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 256
@@ -56,16 +56,16 @@ define void @example1() nounwind uwtable ssp {
 ;UNROLL: store <4 x i32>
 ;UNROLL: store <4 x i32>
 ;UNROLL: ret void
-define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
+define void @example10b(ptr noalias nocapture %sa, ptr noalias nocapture %sb, ptr noalias nocapture %sc, ptr noalias nocapture %ia, ptr noalias nocapture %ib, ptr noalias nocapture %ic) nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds i16, i16* %sb, i64 %indvars.iv
-  %3 = load i16, i16* %2, align 2
+  %2 = getelementptr inbounds i16, ptr %sb, i64 %indvars.iv
+  %3 = load i16, ptr %2, align 2
   %4 = sext i16 %3 to i32
-  %5 = getelementptr inbounds i32, i32* %ia, i64 %indvars.iv
-  store i32 %4, i32* %5, align 4
+  %5 = getelementptr inbounds i32, ptr %ia, i64 %indvars.iv
+  store i32 %4, ptr %5, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/int128_no_gather.ll b/llvm/test/Transforms/LoopVectorize/X86/int128_no_gather.ll
index e9f92c9c7a246..44e9d3e426f45 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/int128_no_gather.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/int128_no_gather.ll
@@ -24,42 +24,42 @@ do.body:                                          ; preds = %do.body, %entry
   %i.0 = phi i128 [ 1, %entry ], [ %add11, %do.body ]
   %and = and i128 %j.0, 32767
   %idxprom = trunc i128 %i.0 to i64
-  %arrayidx = getelementptr inbounds [151 x i128], [151 x i128]* @x, i64 0, i64 %idxprom
-  store i128 %and, i128* %arrayidx, align 16
+  %arrayidx = getelementptr inbounds [151 x i128], ptr @x, i64 0, i64 %idxprom
+  store i128 %and, ptr %arrayidx, align 16
   %add = add nuw nsw i128 %j.0, 11111
   %and1 = and i128 %add, 32767
   %add2 = add nuw nsw i128 %i.0, 1
   %idxprom3 = trunc i128 %add2 to i64
-  %arrayidx4 = getelementptr inbounds [151 x i128], [151 x i128]* @x, i64 0, i64 %idxprom3
-  store i128 %and1, i128* %arrayidx4, align 16
+  %arrayidx4 = getelementptr inbounds [151 x i128], ptr @x, i64 0, i64 %idxprom3
+  store i128 %and1, ptr %arrayidx4, align 16
   %add5 = add nuw nsw i128 %j.0, 22222
   %and6 = and i128 %add5, 32767
   %add7 = add nuw nsw i128 %i.0, 2
   %idxprom8 = trunc i128 %add7 to i64
-  %arrayidx9 = getelementptr inbounds [151 x i128], [151 x i128]* @x, i64 0, i64 %idxprom8
-  store i128 %and6, i128* %arrayidx9, align 16
+  %arrayidx9 = getelementptr inbounds [151 x i128], ptr @x, i64 0, i64 %idxprom8
+  store i128 %and6, ptr %arrayidx9, align 16
   %add10 = add nuw nsw i128 %j.0, 33333
   %add11 = add nuw nsw i128 %i.0, 3
   %cmp = icmp slt i128 %add11, 149
   br i1 %cmp, label %do.body, label %do.end
 
 do.end:                                           ; preds = %do.body
-  store i128 1766649, i128* getelementptr inbounds ([151 x i128], [151 x i128]* @x, i64 0, i64 149), align 16
-  store i128 1766649, i128* getelementptr inbounds ([151 x i128], [151 x i128]* @x, i64 0, i64 150), align 16
+  store i128 1766649, ptr getelementptr inbounds ([151 x i128], ptr @x, i64 0, i64 149), align 16
+  store i128 1766649, ptr getelementptr inbounds ([151 x i128], ptr @x, i64 0, i64 150), align 16
   %call = tail call i32 @y3inner()
-  %0 = load i128, i128* getelementptr inbounds ([151 x i128], [151 x i128]* @x, i64 0, i64 25), align 16
+  %0 = load i128, ptr getelementptr inbounds ([151 x i128], ptr @x, i64 0, i64 25), align 16
   %cmp12 = icmp eq i128 %0, 5085
   br i1 %cmp12, label %if.then, label %if.else
 
 if.then:                                          ; preds = %do.end
-  %puts = tail call i32 @puts(i8* getelementptr inbounds ([45 x i8], [45 x i8]* @str, i64 0, i64 0))
+  %puts = tail call i32 @puts(ptr @str)
   br label %if.end
 
 if.else:                                          ; preds = %do.end
   %coerce.sroa.0.0.extract.trunc = trunc i128 %0 to i64
   %coerce.sroa.2.0.extract.shift = lshr i128 %0, 64
   %coerce.sroa.2.0.extract.trunc = trunc i128 %coerce.sroa.2.0.extract.shift to i64
-  %call14 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([44 x i8], [44 x i8]* @.str.1, i64 0, i64 0), i64 %coerce.sroa.0.0.extract.trunc, i64 %coerce.sroa.2.0.extract.trunc)
+  %call14 = tail call i32 (ptr, ...) @printf(ptr @.str.1, i64 %coerce.sroa.0.0.extract.trunc, i64 %coerce.sroa.2.0.extract.trunc)
   br label %if.end
 
 if.end:                                           ; preds = %if.else, %if.then
@@ -67,9 +67,9 @@ if.end:                                           ; preds = %if.else, %if.then
 }
 
 ; Function Attrs: nounwind
-declare i32 @printf(i8*, ...) #1
+declare i32 @printf(ptr, ...) #1
 ; Function Attrs: nounwind
-declare i32 @puts(i8* nocapture readonly) #2
+declare i32 @puts(ptr nocapture readonly) #2
 
 attributes #0 = { noinline nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="skylake-avx512" "target-features"="+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+mpx,+pclmul,+pku,+popcnt,+rdrnd,+rdseed,+rtm,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="skylake-avx512" "target-features"="+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+mpx,+pclmul,+pku,+popcnt,+rdrnd,+rdseed,+rtm,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "unsafe-fp-math"="false" "use-soft-float"="false" }

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/interleave_short_tc.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave_short_tc.ll
index b1c0696bb5162..d3604b51a82f9 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/interleave_short_tc.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/interleave_short_tc.ll
@@ -43,14 +43,14 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 
 for.body:                                         ; preds = %for.body, %for.body.preheader
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [5 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %1 = trunc i64 %indvars.iv to i32
   %mul = mul nsw i32 %0, %1
-  %arrayidx2 = getelementptr inbounds [5 x i32], [5 x i32]* @a, i64 0, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds [5 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %2, %mul
-  store i32 %add, i32* %arrayidx2, align 4
+  store i32 %add, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !prof !1

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-large-gap.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-large-gap.ll
index b4dc283512e43..e75d469506376 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-large-gap.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-large-gap.ll
@@ -6,7 +6,7 @@
 ; interleaving.
 
 ; Test from https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=7560
-define void @test1(%struct.ST4* noalias %B) {
+define void @test1(ptr noalias %B) {
 ; CHECK-LABEL: @test1
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label %for.body
@@ -23,14 +23,14 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %p1 = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 0
-  store i32 65536, i32* %p1, align 4
-  %p2 = getelementptr i32, i32* %p1, i32 -2147483648
-  store i32 65536, i32* %p2, align 4
-  %p3 = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 2
-  store i32 10, i32* %p3, align 4
-  %p4 = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 3
-  store i32 12, i32* %p4, align 4
+  %p1 = getelementptr inbounds %struct.ST4, ptr %B, i64 %indvars.iv, i32 0
+  store i32 65536, ptr %p1, align 4
+  %p2 = getelementptr i32, ptr %p1, i32 -2147483648
+  store i32 65536, ptr %p2, align 4
+  %p3 = getelementptr inbounds %struct.ST4, ptr %B, i64 %indvars.iv, i32 2
+  store i32 10, ptr %p3, align 4
+  %p4 = getelementptr inbounds %struct.ST4, ptr %B, i64 %indvars.iv, i32 3
+  store i32 12, ptr %p4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -57,11 +57,11 @@ entry:
 for.body:
   %iv = phi i64 [ 1, %entry ], [ %iv.next, %for.body ]
   %iv.next = add nsw i64 %iv, 1
-  %arrayidx = getelementptr inbounds [3 x i32], [3 x i32]* undef, i64 0, i64 %iv.next
-  %G2 = getelementptr i32, i32* %arrayidx, i64 %iv.next
-  %G9 = getelementptr i32, i32* %G2, i32 -2147483647
-  store i32 0, i32* %G2
-  store i32 1, i32* %G9
+  %arrayidx = getelementptr inbounds [3 x i32], ptr undef, i64 0, i64 %iv.next
+  %G2 = getelementptr i32, ptr %arrayidx, i64 %iv.next
+  %G9 = getelementptr i32, ptr %G2, i32 -2147483647
+  store i32 0, ptr %G2
+  store i32 1, ptr %G9
   %cmp = icmp ule i64 %iv, 1000
   br i1 false, label %for.body, label %exit
 
@@ -87,11 +87,11 @@ entry:
 for.body:
   %iv = phi i64 [ 1, %entry ], [ %iv.next, %for.body ]
   %iv.next = add nsw i64 %iv, 1
-  %arrayidx = getelementptr inbounds [3 x i32], [3 x i32]* undef, i64 0, i64 %iv.next
-  %G2 = getelementptr i32, i32* %arrayidx, i64 %iv.next
-  %G9 = getelementptr i32, i32* %G2, i32 -2147483648
-  store i32 0, i32* %G2
-  store i32 1, i32* %G9
+  %arrayidx = getelementptr inbounds [3 x i32], ptr undef, i64 0, i64 %iv.next
+  %G2 = getelementptr i32, ptr %arrayidx, i64 %iv.next
+  %G9 = getelementptr i32, ptr %G2, i32 -2147483648
+  store i32 0, ptr %G2
+  store i32 1, ptr %G9
   %cmp = icmp ule i64 %iv, 1000
   br i1 false, label %for.body, label %exit
 

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-waw-dependency.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-waw-dependency.ll
index 3d4de065e34e7..8748e43e17b02 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-waw-dependency.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-waw-dependency.ll
@@ -14,7 +14,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; store 0 and store (x + 1) to the same memory.
 ; For now, we identify such unsafe dependency and disable adding the
 ; store into the interleaved group.
-; In this test case, because we disable adding store into i32* %storeaddr12 and
+; In this test case, because we disable adding store into ptr %storeaddr12 and
 ; storeaddr22, we create interleaved groups with gaps and
 ; disable that interleaved group. So, we are only left with valid interleaved
 ; groups.
@@ -23,81 +23,78 @@ target triple = "x86_64-unknown-linux-gnu"
 
 
 ; CHECK:      LV: Analyzing interleaved accesses...
-; CHECK:      LV: Creating an interleave group with:  store i32 %tmp34, i32* %storeaddr32, align 4
-; CHECK-NEXT: LV: Inserted:  store i32 %tmp24, i32* %storeaddr22, align 4
-; CHECK-NEXT:     into the interleave group with  store i32 %tmp34, i32* %storeaddr32, align 4
-; CHECK-NEXT: LV: Inserted:  store i32 %tmp14, i32* %storeaddr12, align 4
-; CHECK-NEXT:     into the interleave group with  store i32 %tmp34, i32* %storeaddr32, align 4
-; CHECK:      LV: Invalidated store group due to dependence between   store i32 %tmp24, i32* %storeaddr22, align 4 and   store i32 0, i32* %storeaddr22, align 4
-; CHECK-NEXT: LV: Creating an interleave group with:  store i32 %tmp24, i32* %storeaddr22, align 4
-; CHECK-NEXT: LV: Inserted:  store i32 %tmp14, i32* %storeaddr12, align 4
-; CHECK-NEXT:     into the interleave group with  store i32 %tmp24, i32* %storeaddr22, align 4
-; CHECK-NEXT: LV: Invalidated store group due to dependence between   store i32 %tmp14, i32* %storeaddr12, align 4 and   store i32 0, i32* %storeaddr12, align 4
-
-
-define void @test(i8* nonnull align 8 dereferenceable_or_null(24) %arg) {
+; CHECK:      LV: Creating an interleave group with:  store i32 %tmp34, ptr %storeaddr32, align 4
+; CHECK-NEXT: LV: Inserted:  store i32 %tmp24, ptr %storeaddr22, align 4
+; CHECK-NEXT:     into the interleave group with  store i32 %tmp34, ptr %storeaddr32, align 4
+; CHECK-NEXT: LV: Inserted:  store i32 %tmp14, ptr %storeaddr12, align 4
+; CHECK-NEXT:     into the interleave group with  store i32 %tmp34, ptr %storeaddr32, align 4
+; CHECK:      LV: Invalidated store group due to dependence between   store i32 %tmp24, ptr %storeaddr22, align 4 and   store i32 0, ptr %storeaddr22, align 4
+; CHECK-NEXT: LV: Creating an interleave group with:  store i32 %tmp24, ptr %storeaddr22, align 4
+; CHECK-NEXT: LV: Inserted:  store i32 %tmp14, ptr %storeaddr12, align 4
+; CHECK-NEXT:     into the interleave group with  store i32 %tmp24, ptr %storeaddr22, align 4
+; CHECK-NEXT: LV: Invalidated store group due to dependence between   store i32 %tmp14, ptr %storeaddr12, align 4 and   store i32 0, ptr %storeaddr12, align 4
+
+
+define void @test(ptr nonnull align 8 dereferenceable_or_null(24) %arg) {
 bb:
-  %tmp = getelementptr inbounds i8, i8* %arg, i64 16
-  %tmp1 = bitcast i8* %tmp to i8**
-  %tmp2 = load i8*, i8** %tmp1, align 8
-  %tmp3 = getelementptr inbounds i8, i8* %arg, i64 8
-  %tmp4 = bitcast i8* %tmp3 to i8**
-  %tmp5 = load i8*, i8** %tmp4, align 8
-  %tmp6 = getelementptr inbounds i8, i8* %tmp5, i64 12
-  %tmp7 = bitcast i8* %tmp6 to i32*
-  %tmp8 = getelementptr inbounds i8, i8* %tmp2, i64 12
+  %tmp = getelementptr inbounds i8, ptr %arg, i64 16
+  %tmp2 = load ptr, ptr %tmp, align 8
+  %tmp3 = getelementptr inbounds i8, ptr %arg, i64 8
+  %tmp5 = load ptr, ptr %tmp3, align 8
+  %tmp6 = getelementptr inbounds i8, ptr %tmp5, i64 12
+  %tmp8 = getelementptr inbounds i8, ptr %tmp2, i64 12
   br label %header
 
 header:                                              ; preds = %latch, %bb
   %tmp10 = phi i64 [ %tmp41, %latch ], [ 3, %bb ]
   %tmp11 = add nsw i64 %tmp10, -1
-  %storeaddr12 = getelementptr inbounds i32, i32* %tmp7, i64 %tmp11
-  %tmp13 = load i32, i32* %storeaddr12, align 4
+  %storeaddr12 = getelementptr inbounds i32, ptr %tmp6, i64 %tmp11
+  %tmp13 = load i32, ptr %storeaddr12, align 4
   %tmp14 = add i32 %tmp13, 1
-  store i32 %tmp14, i32* %storeaddr12, align 4
+  store i32 %tmp14, ptr %storeaddr12, align 4
   %tmp15 = icmp slt i32 %tmp13, 1
   %tmp16 = xor i1 %tmp15, true
   %tmp17 = zext i1 %tmp16 to i8
-  %tmp18 = getelementptr inbounds i8, i8* %tmp8, i64 %tmp10
-  store i8 %tmp17, i8* %tmp18, align 1
+  %tmp18 = getelementptr inbounds i8, ptr %tmp8, i64 %tmp10
+  store i8 %tmp17, ptr %tmp18, align 1
   br i1 %tmp15, label %bb19, label %bb20
 
 bb19:                                             ; preds = %header
-  store i32 0, i32* %storeaddr12, align 4
+  store i32 0, ptr %storeaddr12, align 4
   br label %bb20
 
 bb20:                                             ; preds = %bb19, %header
   %tmp21 = add nuw nsw i64 %tmp10, 1
-  %storeaddr22 = getelementptr inbounds i32, i32* %tmp7, i64 %tmp10
-  %tmp23 = load i32, i32* %storeaddr22, align 4
+  %storeaddr22 = getelementptr inbounds i32, ptr %tmp6, i64 %tmp10
+  %tmp23 = load i32, ptr %storeaddr22, align 4
   %tmp24 = add i32 %tmp23, 1
-  store i32 %tmp24, i32* %storeaddr22, align 4
+  store i32 %tmp24, ptr %storeaddr22, align 4
   %tmp25 = icmp slt i32 %tmp23, 1
   %tmp26 = xor i1 %tmp25, true
   %tmp27 = zext i1 %tmp26 to i8
-  %tmp28 = getelementptr inbounds i8, i8* %tmp8, i64 %tmp21
-  store i8 %tmp27, i8* %tmp28, align 1
+  %tmp28 = getelementptr inbounds i8, ptr %tmp8, i64 %tmp21
+  store i8 %tmp27, ptr %tmp28, align 1
   br i1 %tmp25, label %bb29, label %bb30
 
 bb29:                                             ; preds = %bb20
-  store i32 0, i32* %storeaddr22, align 4
+  store i32 0, ptr %storeaddr22, align 4
   br label %bb30
 
 bb30:                                             ; preds = %bb29, %bb20
   %tmp31 = add nuw nsw i64 %tmp10, 2
-  %storeaddr32 = getelementptr inbounds i32, i32* %tmp7, i64 %tmp21
-  %tmp33 = load i32, i32* %storeaddr32, align 4
+  %storeaddr32 = getelementptr inbounds i32, ptr %tmp6, i64 %tmp21
+  %tmp33 = load i32, ptr %storeaddr32, align 4
   %tmp34 = add i32 %tmp33, 1
-  store i32 %tmp34, i32* %storeaddr32, align 4
+  store i32 %tmp34, ptr %storeaddr32, align 4
   %tmp35 = icmp slt i32 %tmp33, 1
   %tmp36 = xor i1 %tmp35, true
   %tmp37 = zext i1 %tmp36 to i8
-  %tmp38 = getelementptr inbounds i8, i8* %tmp8, i64 %tmp31
-  store i8 %tmp37, i8* %tmp38, align 1
+  %tmp38 = getelementptr inbounds i8, ptr %tmp8, i64 %tmp31
+  store i8 %tmp37, ptr %tmp38, align 1
   br i1 %tmp35, label %bb39, label %latch
 
 bb39:                                             ; preds = %bb30
-  store i32 0, i32* %storeaddr32, align 4
+  store i32 0, ptr %storeaddr32, align 4
   br label %latch
 
 latch:                                             ; preds = %bb39, %bb30

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll
index 53f85b37ca22e..e8b938600526a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll
@@ -5,7 +5,7 @@
 ; RUN: opt -S -mtriple=x86_64-pc_linux -passes=loop-vectorize,instcombine -mcpu=slm < %s | FileCheck %s --check-prefix=SSE
 ; RUN: opt -S -mtriple=x86_64-pc_linux -passes=loop-vectorize,instcombine -mcpu=atom < %s | FileCheck %s --check-prefix=ATOM
 
-define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) {
+define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
 ; SSE-LABEL: @foo(
 ; SSE-NEXT:  entry:
 ; SSE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -16,24 +16,20 @@ define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b)
 ; SSE-NEXT:    [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
 ; SSE-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX]], 1
 ; SSE-NEXT:    [[TMP2:%.*]] = or i64 [[TMP1]], 8
-; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
-; SSE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
-; SSE-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP3]] to <8 x i32>*
-; SSE-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP4]] to <8 x i32>*
-; SSE-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP5]], align 4
-; SSE-NEXT:    [[WIDE_VEC1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP6]], align 4
+; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; SSE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
+; SSE-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4
+; SSE-NEXT:    [[WIDE_VEC1:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4
 ; SSE-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; SSE-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; SSE-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 ; SSE-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 ; SSE-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[STRIDED_VEC3]], [[STRIDED_VEC]]
 ; SSE-NEXT:    [[TMP8:%.*]] = add nsw <4 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC2]]
-; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; SSE-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
-; SSE-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* [[TMP10]], align 4
-; SSE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i64 4
-; SSE-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
-; SSE-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* [[TMP12]], align 4
+; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; SSE-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
+; SSE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 4
+; SSE-NEXT:    store <4 x i32> [[TMP8]], ptr [[TMP11]], align 4
 ; SSE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; SSE-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; SSE-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -60,18 +56,14 @@ define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b)
 ; AVX1-NEXT:    [[TMP4:%.*]] = or i64 [[TMP3]], 16
 ; AVX1-NEXT:    [[TMP5:%.*]] = shl i64 [[INDEX]], 1
 ; AVX1-NEXT:    [[TMP6:%.*]] = or i64 [[TMP5]], 24
-; AVX1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
-; AVX1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
-; AVX1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]]
-; AVX1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP6]]
-; AVX1-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP7]] to <8 x i32>*
-; AVX1-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP8]] to <8 x i32>*
-; AVX1-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP9]] to <8 x i32>*
-; AVX1-NEXT:    [[TMP14:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>*
-; AVX1-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP11]], align 4
-; AVX1-NEXT:    [[WIDE_VEC1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP12]], align 4
-; AVX1-NEXT:    [[WIDE_VEC2:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4
-; AVX1-NEXT:    [[WIDE_VEC3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP14]], align 4
+; AVX1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; AVX1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
+; AVX1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP4]]
+; AVX1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP6]]
+; AVX1-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4
+; AVX1-NEXT:    [[WIDE_VEC1:%.*]] = load <8 x i32>, ptr [[TMP8]], align 4
+; AVX1-NEXT:    [[WIDE_VEC2:%.*]] = load <8 x i32>, ptr [[TMP9]], align 4
+; AVX1-NEXT:    [[WIDE_VEC3:%.*]] = load <8 x i32>, ptr [[TMP10]], align 4
 ; AVX1-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; AVX1-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; AVX1-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <8 x i32> [[WIDE_VEC2]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -84,18 +76,14 @@ define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b)
 ; AVX1-NEXT:    [[TMP16:%.*]] = add nsw <4 x i32> [[STRIDED_VEC8]], [[STRIDED_VEC4]]
 ; AVX1-NEXT:    [[TMP17:%.*]] = add nsw <4 x i32> [[STRIDED_VEC9]], [[STRIDED_VEC5]]
 ; AVX1-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[STRIDED_VEC10]], [[STRIDED_VEC6]]
-; AVX1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; AVX1-NEXT:    [[TMP20:%.*]] = bitcast i32* [[TMP19]] to <4 x i32>*
-; AVX1-NEXT:    store <4 x i32> [[TMP15]], <4 x i32>* [[TMP20]], align 4
-; AVX1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i64 4
-; AVX1-NEXT:    [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
-; AVX1-NEXT:    store <4 x i32> [[TMP16]], <4 x i32>* [[TMP22]], align 4
-; AVX1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i64 8
-; AVX1-NEXT:    [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
-; AVX1-NEXT:    store <4 x i32> [[TMP17]], <4 x i32>* [[TMP24]], align 4
-; AVX1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i64 12
-; AVX1-NEXT:    [[TMP26:%.*]] = bitcast i32* [[TMP25]] to <4 x i32>*
-; AVX1-NEXT:    store <4 x i32> [[TMP18]], <4 x i32>* [[TMP26]], align 4
+; AVX1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; AVX1-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP19]], align 4
+; AVX1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 4
+; AVX1-NEXT:    store <4 x i32> [[TMP16]], ptr [[TMP21]], align 4
+; AVX1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 8
+; AVX1-NEXT:    store <4 x i32> [[TMP17]], ptr [[TMP23]], align 4
+; AVX1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 12
+; AVX1-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP25]], align 4
 ; AVX1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; AVX1-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; AVX1-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -122,18 +110,14 @@ define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b)
 ; AVX2-NEXT:    [[TMP4:%.*]] = or i64 [[TMP3]], 32
 ; AVX2-NEXT:    [[TMP5:%.*]] = shl i64 [[INDEX]], 1
 ; AVX2-NEXT:    [[TMP6:%.*]] = or i64 [[TMP5]], 48
-; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
-; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
-; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]]
-; AVX2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP6]]
-; AVX2-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP7]] to <16 x i32>*
-; AVX2-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP8]] to <16 x i32>*
-; AVX2-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP9]] to <16 x i32>*
-; AVX2-NEXT:    [[TMP14:%.*]] = bitcast i32* [[TMP10]] to <16 x i32>*
-; AVX2-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i32>, <16 x i32>* [[TMP11]], align 4
-; AVX2-NEXT:    [[WIDE_VEC1:%.*]] = load <16 x i32>, <16 x i32>* [[TMP12]], align 4
-; AVX2-NEXT:    [[WIDE_VEC2:%.*]] = load <16 x i32>, <16 x i32>* [[TMP13]], align 4
-; AVX2-NEXT:    [[WIDE_VEC3:%.*]] = load <16 x i32>, <16 x i32>* [[TMP14]], align 4
+; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
+; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP4]]
+; AVX2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP6]]
+; AVX2-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP7]], align 4
+; AVX2-NEXT:    [[WIDE_VEC1:%.*]] = load <16 x i32>, ptr [[TMP8]], align 4
+; AVX2-NEXT:    [[WIDE_VEC2:%.*]] = load <16 x i32>, ptr [[TMP9]], align 4
+; AVX2-NEXT:    [[WIDE_VEC3:%.*]] = load <16 x i32>, ptr [[TMP10]], align 4
 ; AVX2-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 ; AVX2-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 ; AVX2-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <16 x i32> [[WIDE_VEC2]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -146,18 +130,14 @@ define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b)
 ; AVX2-NEXT:    [[TMP16:%.*]] = add nsw <8 x i32> [[STRIDED_VEC8]], [[STRIDED_VEC4]]
 ; AVX2-NEXT:    [[TMP17:%.*]] = add nsw <8 x i32> [[STRIDED_VEC9]], [[STRIDED_VEC5]]
 ; AVX2-NEXT:    [[TMP18:%.*]] = add nsw <8 x i32> [[STRIDED_VEC10]], [[STRIDED_VEC6]]
-; AVX2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; AVX2-NEXT:    [[TMP20:%.*]] = bitcast i32* [[TMP19]] to <8 x i32>*
-; AVX2-NEXT:    store <8 x i32> [[TMP15]], <8 x i32>* [[TMP20]], align 4
-; AVX2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i64 8
-; AVX2-NEXT:    [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <8 x i32>*
-; AVX2-NEXT:    store <8 x i32> [[TMP16]], <8 x i32>* [[TMP22]], align 4
-; AVX2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i64 16
-; AVX2-NEXT:    [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <8 x i32>*
-; AVX2-NEXT:    store <8 x i32> [[TMP17]], <8 x i32>* [[TMP24]], align 4
-; AVX2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i64 24
-; AVX2-NEXT:    [[TMP26:%.*]] = bitcast i32* [[TMP25]] to <8 x i32>*
-; AVX2-NEXT:    store <8 x i32> [[TMP18]], <8 x i32>* [[TMP26]], align 4
+; AVX2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; AVX2-NEXT:    store <8 x i32> [[TMP15]], ptr [[TMP19]], align 4
+; AVX2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 8
+; AVX2-NEXT:    store <8 x i32> [[TMP16]], ptr [[TMP21]], align 4
+; AVX2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 16
+; AVX2-NEXT:    store <8 x i32> [[TMP17]], ptr [[TMP23]], align 4
+; AVX2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 24
+; AVX2-NEXT:    store <8 x i32> [[TMP18]], ptr [[TMP25]], align 4
 ; AVX2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
 ; AVX2-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; AVX2-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -178,14 +158,14 @@ define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b)
 ; ATOM:       for.body:
 ; ATOM-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; ATOM-NEXT:    [[TMP0:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1
-; ATOM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
-; ATOM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; ATOM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; ATOM-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; ATOM-NEXT:    [[TMP2:%.*]] = or i64 [[TMP0]], 1
-; ATOM-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
-; ATOM-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; ATOM-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
+; ATOM-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
 ; ATOM-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP3]], [[TMP1]]
-; ATOM-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
-; ATOM-NEXT:    store i32 [[ADD4]], i32* [[ARRAYIDX6]], align 4
+; ATOM-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; ATOM-NEXT:    store i32 [[ADD4]], ptr [[ARRAYIDX6]], align 4
 ; ATOM-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; ATOM-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; ATOM-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -199,14 +179,14 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = shl nsw i64 %indvars.iv, 1
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %0
-  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %0
+  %1 = load i32, ptr %arrayidx, align 4
   %2 = or i64 %0, 1
-  %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %2
-  %3 = load i32, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %b, i64 %2
+  %3 = load i32, ptr %arrayidx3, align 4
   %add4 = add nsw i32 %3, %1
-  %arrayidx6 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 %add4, i32* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %add4, ptr %arrayidx6, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.cond.cleanup, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll
index 5fdafb4ea6223..038852f55f455 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define void @sin_f64(double* nocapture %varray) {
+define void @sin_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f64(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_sin(<2 x double> [[TMP4:%.*]])
@@ -16,8 +16,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @sin(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
@@ -31,7 +31,7 @@ for.end:
 !3 = !{!"llvm.loop.vectorize.enable", i1 true}
 
 
-define void @sin_f32(float* nocapture %varray) {
+define void @sin_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f32(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_sinf(<8 x float> [[TMP4:%.*]])
@@ -44,8 +44,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @sinf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
@@ -58,7 +58,7 @@ for.end:
 !22 = !{!"llvm.loop.vectorize.width", i32 8}
 !23 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @sin_f64_intrinsic(double* nocapture %varray) {
+define void @sin_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f64_intrinsic(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_sin(<2 x double> [[TMP4:%.*]])
@@ -71,8 +71,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.sin.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
@@ -85,7 +85,7 @@ for.end:
 !32 = !{!"llvm.loop.vectorize.width", i32 2}
 !33 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @sin_f32_intrinsic(float* nocapture %varray) {
+define void @sin_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f32_intrinsic(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_sinf(<8 x float> [[TMP4:%.*]])
@@ -98,8 +98,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.sin.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41
@@ -112,7 +112,7 @@ for.end:
 !42 = !{!"llvm.loop.vectorize.width", i32 8}
 !43 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @cos_f64(double* nocapture %varray) {
+define void @cos_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f64(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_cos(<2 x double> [[TMP4:%.*]])
@@ -125,8 +125,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @cos(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51
@@ -139,7 +139,7 @@ for.end:
 !52 = !{!"llvm.loop.vectorize.width", i32 2}
 !53 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @cos_f32(float* nocapture %varray) {
+define void @cos_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f32(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_cosf(<8 x float> [[TMP4:%.*]])
@@ -152,8 +152,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @cosf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !61
@@ -166,7 +166,7 @@ for.end:
 !62 = !{!"llvm.loop.vectorize.width", i32 8}
 !63 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @cos_f64_intrinsic(double* nocapture %varray) {
+define void @cos_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f64_intrinsic(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_cos(<2 x double> [[TMP4:%.*]])
@@ -179,8 +179,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.cos.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !71
@@ -193,7 +193,7 @@ for.end:
 !72 = !{!"llvm.loop.vectorize.width", i32 2}
 !73 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @cos_f32_intrinsic(float* nocapture %varray) {
+define void @cos_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f32_intrinsic(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_cosf(<8 x float> [[TMP4:%.*]])
@@ -206,8 +206,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.cos.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !81
@@ -221,7 +221,7 @@ for.end:
 !83 = !{!"llvm.loop.vectorize.enable", i1 true}
 
 
-define void @exp_f32(float* nocapture %varray) {
+define void @exp_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32
 ; CHECK-LABEL:    vector.body
 ; CHECK: <8 x float> @_ZGVdN8v_expf
@@ -233,8 +233,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @expf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !91
@@ -247,7 +247,7 @@ for.end:                                          ; preds = %for.body
 !92 = !{!"llvm.loop.vectorize.width", i32 8}
 !93 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @exp_f32_intrin(float* nocapture %varray) {
+define void @exp_f32_intrin(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32_intrin
 ; CHECK-LABEL: vector.body
 ; CHECK: <8 x float> @_ZGVdN8v_expf
@@ -259,8 +259,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @llvm.exp.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !101
@@ -274,7 +274,7 @@ for.end:                                          ; preds = %for.body
 !103 = !{!"llvm.loop.vectorize.enable", i1 true}
 
 
-define void @log_f32(float* nocapture %varray) {
+define void @log_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f32
 ; CHECK-LABEL: vector.body
 ; CHECK: <8 x float> @_ZGVdN8v_logf
@@ -286,8 +286,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @logf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !111
@@ -300,7 +300,7 @@ for.end:                                          ; preds = %for.body
 !112 = !{!"llvm.loop.vectorize.width", i32 8}
 !113 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
+define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32
 ; CHECK-LABEL:    vector.body
 ; CHECK: <8 x float> @_ZGVdN8vv_powf
@@ -311,11 +311,11 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
-  %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv
-  %tmp1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %indvars.iv
+  %tmp1 = load float, ptr %arrayidx, align 4
   %tmp2 = tail call fast float @powf(float %conv, float %tmp1)
-  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %tmp2, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %tmp2, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !121
@@ -328,7 +328,7 @@ for.end:                                          ; preds = %for.body
 !122 = !{!"llvm.loop.vectorize.width", i32 8}
 !123 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @pow_f32_intrin(float* nocapture %varray, float* nocapture readonly %exp) {
+define void @pow_f32_intrin(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32_intrin
 ; CHECK-LABEL:    vector.body
 ; CHECK: <8 x float> @_ZGVdN8vv_powf
@@ -339,11 +339,11 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
-  %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv
-  %tmp1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %indvars.iv
+  %tmp1 = load float, ptr %arrayidx, align 4
   %tmp2 = tail call fast float @llvm.pow.f32(float %conv, float %tmp1)
-  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %tmp2, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %tmp2, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !131

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll
index b4023fa81add7..d0d0d78a0d27e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll
@@ -2,7 +2,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define void @exp_f32(float* nocapture %varray) {
+define void @exp_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32
 ; CHECK-LABEL:    vector.body
 ; CHECK: <4 x float> @_ZGVbN4v___expf_finite
@@ -15,8 +15,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @__expf_finite(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
@@ -29,7 +29,7 @@ for.end:                                          ; preds = %for.body
 !2 = !{!"llvm.loop.vectorize.width", i32 4}
 !3 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @exp_f64(double* nocapture %varray) {
+define void @exp_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f64
 ; CHECK-LABEL:    vector.body
 ; CHECK: <4 x double> @_ZGVdN4v___exp_finite
@@ -42,8 +42,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call fast double @__exp_finite(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
+  store double %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !11
@@ -56,7 +56,7 @@ for.end:                                          ; preds = %for.body
 !12 = !{!"llvm.loop.vectorize.width", i32 4}
 !13 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @log_f32(float* nocapture %varray) {
+define void @log_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f32
 ; CHECK-LABEL:    vector.body
 ; CHECK: <4 x float> @_ZGVbN4v___logf_finite
@@ -69,8 +69,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @__logf_finite(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
@@ -83,7 +83,7 @@ for.end:                                          ; preds = %for.body
 !22 = !{!"llvm.loop.vectorize.width", i32 4}
 !23 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @log_f64(double* nocapture %varray) {
+define void @log_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f64
 ; CHECK-LABEL:    vector.body
 ; CHECK: <4 x double> @_ZGVdN4v___log_finite
@@ -96,8 +96,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call fast double @__log_finite(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
+  store double %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
@@ -110,7 +110,7 @@ for.end:                                          ; preds = %for.body
 !32 = !{!"llvm.loop.vectorize.width", i32 4}
 !33 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
+define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32
 ; CHECK-LABEL:    vector.body
 ; CHECK: <4 x float> @_ZGVbN4vv___powf_finite
@@ -122,11 +122,11 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
-  %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv
-  %tmp1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %indvars.iv
+  %tmp1 = load float, ptr %arrayidx, align 4
   %tmp2 = tail call fast float @__powf_finite(float %conv, float %tmp1)
-  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %tmp2, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %tmp2, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41
@@ -139,7 +139,7 @@ for.end:                                          ; preds = %for.body
 !42 = !{!"llvm.loop.vectorize.width", i32 4}
 !43 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
+define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f64
 ; CHECK-LABEL:    vector.body
 ; CHECK: <4 x double> @_ZGVdN4vv___pow_finite
@@ -151,11 +151,11 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to double
-  %arrayidx = getelementptr inbounds double, double* %exp, i64 %indvars.iv
-  %tmp1 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %exp, i64 %indvars.iv
+  %tmp1 = load double, ptr %arrayidx, align 4
   %tmp2 = tail call fast double @__pow_finite(double %conv, double %tmp1)
-  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %indvars.iv
-  store double %tmp2, double* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
+  store double %tmp2, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll
index f982c6e820f79..7a0e44c9e9916 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define void @sin_f64(double* nocapture %varray) {
+define void @sin_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f64(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP4:%.*]])
@@ -16,8 +16,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @sin(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
@@ -31,7 +31,7 @@ for.end:
 !3 = !{!"llvm.loop.vectorize.enable", i1 true}
 
 
-define void @sin_f32(float* nocapture %varray) {
+define void @sin_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f32(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @_ZGVbN4v_sinf(<4 x float> [[TMP4:%.*]])
@@ -44,8 +44,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @sinf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
@@ -58,7 +58,7 @@ for.end:
 !22 = !{!"llvm.loop.vectorize.width", i32 4}
 !23 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @sin_f64_intrinsic(double* nocapture %varray) {
+define void @sin_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f64_intrinsic(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP4:%.*]])
@@ -71,8 +71,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.sin.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
@@ -85,7 +85,7 @@ for.end:
 !32 = !{!"llvm.loop.vectorize.width", i32 4}
 !33 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @sin_f32_intrinsic(float* nocapture %varray) {
+define void @sin_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f32_intrinsic(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @_ZGVbN4v_sinf(<4 x float> [[TMP4:%.*]])
@@ -98,8 +98,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.sin.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41
@@ -112,7 +112,7 @@ for.end:
 !42 = !{!"llvm.loop.vectorize.width", i32 4}
 !43 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @cos_f64(double* nocapture %varray) {
+define void @cos_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f64(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP4:%.*]])
@@ -125,8 +125,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @cos(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51
@@ -139,7 +139,7 @@ for.end:
 !52 = !{!"llvm.loop.vectorize.width", i32 4}
 !53 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @cos_f32(float* nocapture %varray) {
+define void @cos_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f32(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @_ZGVbN4v_cosf(<4 x float> [[TMP4:%.*]])
@@ -152,8 +152,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @cosf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !61
@@ -166,7 +166,7 @@ for.end:
 !62 = !{!"llvm.loop.vectorize.width", i32 4}
 !63 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @cos_f64_intrinsic(double* nocapture %varray) {
+define void @cos_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f64_intrinsic(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP4:%.*]])
@@ -179,8 +179,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.cos.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !71
@@ -193,7 +193,7 @@ for.end:
 !72 = !{!"llvm.loop.vectorize.width", i32 4}
 !73 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @cos_f32_intrinsic(float* nocapture %varray) {
+define void @cos_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f32_intrinsic(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @_ZGVbN4v_cosf(<4 x float> [[TMP4:%.*]])
@@ -206,8 +206,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.cos.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !81
@@ -221,7 +221,7 @@ for.end:
 !83 = !{!"llvm.loop.vectorize.enable", i1 true}
 
 
-define void @exp_f32(float* nocapture %varray) {
+define void @exp_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32
 ; CHECK-LABEL:    vector.body
 ; CHECK: <4 x float> @_ZGVbN4v_expf
@@ -233,8 +233,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @expf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !91
@@ -247,7 +247,7 @@ for.end:                                          ; preds = %for.body
 !92 = !{!"llvm.loop.vectorize.width", i32 4}
 !93 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @exp_f32_intrin(float* nocapture %varray) {
+define void @exp_f32_intrin(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32_intrin
 ; CHECK-LABEL: vector.body
 ; CHECK: <4 x float> @_ZGVbN4v_expf
@@ -259,8 +259,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @llvm.exp.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !101
@@ -274,7 +274,7 @@ for.end:                                          ; preds = %for.body
 !103 = !{!"llvm.loop.vectorize.enable", i1 true}
 
 
-define void @log_f32(float* nocapture %varray) {
+define void @log_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f32
 ; CHECK-LABEL: vector.body
 ; CHECK: <4 x float> @_ZGVbN4v_logf
@@ -286,8 +286,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @logf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !111
@@ -300,7 +300,7 @@ for.end:                                          ; preds = %for.body
 !112 = !{!"llvm.loop.vectorize.width", i32 4}
 !113 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
+define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32
 ; CHECK-LABEL:    vector.body
 ; CHECK: <4 x float> @_ZGVbN4vv_powf
@@ -311,11 +311,11 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
-  %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv
-  %tmp1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %indvars.iv
+  %tmp1 = load float, ptr %arrayidx, align 4
   %tmp2 = tail call fast float @powf(float %conv, float %tmp1)
-  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %tmp2, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %tmp2, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !121
@@ -328,7 +328,7 @@ for.end:                                          ; preds = %for.body
 !122 = !{!"llvm.loop.vectorize.width", i32 4}
 !123 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @pow_f32_intrin(float* nocapture %varray, float* nocapture readonly %exp) {
+define void @pow_f32_intrin(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32_intrin
 ; CHECK-LABEL:    vector.body
 ; CHECK: <4 x float> @_ZGVbN4vv_powf
@@ -339,11 +339,11 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
-  %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv
-  %tmp1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %indvars.iv
+  %tmp1 = load float, ptr %arrayidx, align 4
   %tmp2 = tail call fast float @llvm.pow.f32(float %conv, float %tmp1)
-  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %tmp2, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %tmp2, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !131

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/max-mstore.ll b/llvm/test/Transforms/LoopVectorize/X86/max-mstore.ll
index 8a1bcacd793c0..760b10cbc3a43 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/max-mstore.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/max-mstore.ll
@@ -25,15 +25,15 @@ entry:
 
 for.body:                                         ; preds = %for.inc, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
-  %arrayidx = getelementptr inbounds [256 x i32], [256 x i32]* @b, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds [256 x i32], [256 x i32]* @a, i64 0, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds [256 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds [256 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %cmp3 = icmp ugt i32 %0, %1
   br i1 %cmp3, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  store i32 %0, i32* %arrayidx2, align 4
+  store i32 %0, ptr %arrayidx2, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll b/llvm/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
index 7d9be8babab23..562f0a0557eaf 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
@@ -4,16 +4,16 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-unknown-linux-gnu"
 
 ; CHECK: <4 x float>
-define void @trivial_loop(float* nocapture %a) nounwind uwtable optsize {
+define void @trivial_loop(ptr nocapture %a) nounwind uwtable optsize {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd float %0, 1.000000e+00
-  store float %add, float* %arrayidx, align 4
+  store float %add, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 8

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll b/llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll
index 1788e884821f1..654376cc54f4a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define i8 @mul_i8(i8* %dataA, i8* %dataB, i32 %N) {
+define i8 @mul_i8(ptr %dataA, ptr %dataB, i32 %N) {
 entry:
   %cmp12 = icmp eq i32 %N, 0
   br i1 %cmp12, label %for.cond.cleanup, label %for.body.preheader
@@ -24,11 +24,11 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %acc.013 = phi i32 [ %add4, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i8, i8* %dataA, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %dataA, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = sext i8 %0 to i32
-  %arrayidx2 = getelementptr inbounds i8, i8* %dataB, i64 %indvars.iv
-  %1 = load i8, i8* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds i8, ptr %dataB, i64 %indvars.iv
+  %1 = load i8, ptr %arrayidx2, align 1
   %conv3 = sext i8 %1 to i32
 ; sources of the mul is sext\sext from i8
 ; use pmullw\sext seq.
@@ -73,7 +73,7 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
 }
 
-define i16 @mul_i16(i16* %dataA, i16* %dataB, i32 %N) {
+define i16 @mul_i16(ptr %dataA, ptr %dataB, i32 %N) {
 entry:
   %cmp12 = icmp eq i32 %N, 0
   br i1 %cmp12, label %for.cond.cleanup, label %for.body.preheader
@@ -93,11 +93,11 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %acc.013 = phi i32 [ %add4, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i16, i16* %dataA, i64 %indvars.iv
-  %0 = load i16, i16* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i16, ptr %dataA, i64 %indvars.iv
+  %0 = load i16, ptr %arrayidx, align 1
   %conv = sext i16 %0 to i32
-  %arrayidx2 = getelementptr inbounds i16, i16* %dataB, i64 %indvars.iv
-  %1 = load i16, i16* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds i16, ptr %dataB, i64 %indvars.iv
+  %1 = load i16, ptr %arrayidx2, align 1
   %conv3 = sext i16 %1 to i32
 ; sources of the mul is sext\sext from i16
 ; use pmulhw\pmullw\pshuf seq.

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/no-vector.ll b/llvm/test/Transforms/LoopVectorize/X86/no-vector.ll
index 01c3c7997e879..91dc2afb1c8a2 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/no-vector.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/no-vector.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -S -mtriple=i386-unknown-freebsd -mcpu=i486 -passes=loop-vectorize < %s
 
-define i32 @PR14639(i8* nocapture %s, i32 %len) nounwind {
+define i32 @PR14639(ptr nocapture %s, i32 %len) nounwind {
 entry:
   %cmp4 = icmp sgt i32 %len, 0
   br i1 %cmp4, label %for.body, label %for.end
@@ -8,8 +8,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.05 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %s, i32 %i.06
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %s, i32 %i.06
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = sext i8 %0 to i32
   %xor = xor i32 %conv, %r.05
   %inc = add nsw i32 %i.06, 1

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll b/llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll
index ca6576d84cd16..5aedc585b0629 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll
@@ -8,7 +8,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
 
 ; Function Attrs: nounwind readonly ssp uwtable
-define double @cond_sum(i32* nocapture readonly %v, i32 %n) #0 !dbg !4 {
+define double @cond_sum(ptr nocapture readonly %v, i32 %n) #0 !dbg !4 {
 entry:
   %cmp.7 = icmp sgt i32 %n, 0, !dbg !3
   br i1 %cmp.7, label %for.body.preheader, label %for.cond.cleanup, !dbg !8
@@ -27,8 +27,8 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %a.08 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %v, i64 %indvars.iv, !dbg !9
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !9, !tbaa !11
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %indvars.iv, !dbg !9
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !9, !tbaa !11
   %cmp1 = icmp eq i32 %0, 0, !dbg !15
   %cond = select i1 %cmp1, double 3.400000e+00, double 1.150000e+00, !dbg !9
   %add = fadd double %a.08, %cond, !dbg !16
@@ -39,7 +39,7 @@ for.body:                                         ; preds = %for.body.preheader,
 }
 
 ; Function Attrs: nounwind readonly ssp uwtable
-define double @cond_sum_loop_hint(i32* nocapture readonly %v, i32 %n) #0 !dbg !20 {
+define double @cond_sum_loop_hint(ptr nocapture readonly %v, i32 %n) #0 !dbg !20 {
 entry:
   %cmp.7 = icmp sgt i32 %n, 0, !dbg !19
   br i1 %cmp.7, label %for.body.preheader, label %for.cond.cleanup, !dbg !21
@@ -58,8 +58,8 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %a.08 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %v, i64 %indvars.iv, !dbg !22
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !22, !tbaa !11
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %indvars.iv, !dbg !22
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !22, !tbaa !11
   %cmp1 = icmp eq i32 %0, 0, !dbg !24
   %cond = select i1 %cmp1, double 3.400000e+00, double 1.150000e+00, !dbg !22
   %add = fadd double %a.08, %cond, !dbg !25

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll b/llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll
index c17a829503291..b1fc96ea77ed0 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll
@@ -8,7 +8,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
 
 ; Function Attrs: nounwind readonly ssp uwtable
-define double @cond_sum(i32* nocapture readonly %v, i32 %n) #0 !dbg !4 !prof !29 {
+define double @cond_sum(ptr nocapture readonly %v, i32 %n) #0 !dbg !4 !prof !29 {
 entry:
   %cmp.7 = icmp sgt i32 %n, 0, !dbg !3
   br i1 %cmp.7, label %for.body.preheader, label %for.cond.cleanup, !dbg !8, !prof !30
@@ -27,8 +27,8 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %a.08 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %v, i64 %indvars.iv, !dbg !9
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !9, !tbaa !11
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %indvars.iv, !dbg !9
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !9, !tbaa !11
   %cmp1 = icmp eq i32 %0, 0, !dbg !15
   %cond = select i1 %cmp1, double 3.400000e+00, double 1.150000e+00, !dbg !9
   %add = fadd double %a.08, %cond, !dbg !16
@@ -39,7 +39,7 @@ for.body:                                         ; preds = %for.body.preheader,
 }
 
 ; Function Attrs: nounwind readonly ssp uwtable
-define double @cond_sum_loop_hint(i32* nocapture readonly %v, i32 %n) #0 !dbg !20 !prof !29{
+define double @cond_sum_loop_hint(ptr nocapture readonly %v, i32 %n) #0 !dbg !20 !prof !29{
 entry:
   %cmp.7 = icmp sgt i32 %n, 0, !dbg !19
   br i1 %cmp.7, label %for.body.preheader, label %for.cond.cleanup, !dbg !21, !prof !30
@@ -58,8 +58,8 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %a.08 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %v, i64 %indvars.iv, !dbg !22
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !22, !tbaa !11
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %indvars.iv, !dbg !22
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !22, !tbaa !11
   %cmp1 = icmp eq i32 %0, 0, !dbg !24
   %cond = select i1 %cmp1, double 3.400000e+00, double 1.150000e+00, !dbg !22
   %add = fadd double %a.08, %cond, !dbg !25

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/nontemporal.ll b/llvm/test/Transforms/LoopVectorize/X86/nontemporal.ll
index ed35412849e6a..53c0e0876f880 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/nontemporal.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/nontemporal.ll
@@ -26,7 +26,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64"
 
 ; CHECK-LABEL: @vectorTest(
-define void @vectorTest(i32* noalias readonly %src, i32* noalias %dst, i32 %nElts) {
+define void @vectorTest(ptr noalias readonly %src, ptr noalias %dst, i32 %nElts) {
 entry:
   %cmp8 = icmp eq i32 %nElts, 0
   br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
@@ -41,13 +41,13 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 for.body:                                         ; preds = %for.body, %for.body.preheader
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
 ; Check that we vectorized the load, and that there is no nontemporal hint.
-; CHECK: %wide.load = load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 4{{$}}
-  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+; CHECK: %wide.load = load <4 x i32>, ptr %{{[0-9]+}}, align 4{{$}}
+  %arrayidx = getelementptr inbounds i32, ptr %src, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
 ; Check that we vectorized the store, and that there is no nontemporal hint.
-; CHECK: store <4 x i32> %wide.load, <4 x i32>* %{{[0-9]+}}, align 4{{$}}
-  %arrayidx2 = getelementptr inbounds i32, i32* %dst, i64 %indvars.iv
-  store i32 %0, i32* %arrayidx2, align 4
+; CHECK: store <4 x i32> %wide.load, ptr %{{[0-9]+}}, align 4{{$}}
+  %arrayidx2 = getelementptr inbounds i32, ptr %dst, i64 %indvars.iv
+  store i32 %0, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -56,7 +56,7 @@ for.body:                                         ; preds = %for.body, %for.body
 ; CHECK-LABEL: @vectorNTStoreTest(
 ; Check that the vectorized type of the store does not appear.
 ; CHECK-NOT: 4 x i32
-define void @vectorNTStoreTest(i32* noalias readonly %src, i32* noalias %dst, i32 %nElts) {
+define void @vectorNTStoreTest(ptr noalias readonly %src, ptr noalias %dst, i32 %nElts) {
 entry:
   %cmp8 = icmp eq i32 %nElts, 0
   br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
@@ -70,12 +70,12 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %for.body, %for.body.preheader
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %dst, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %src, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %dst, i64 %indvars.iv
 ; Check that the store is not vectorized and that we don't lose the !nontemporal hint in it.
-; CHECK: store i32 %{{[0-9]+}}, i32* %arrayidx2, align 4, !nontemporal !4
-  store i32 %0, i32* %arrayidx2, align 4, !nontemporal !0
+; CHECK: store i32 %{{[0-9]+}}, ptr %arrayidx2, align 4, !nontemporal !4
+  store i32 %0, ptr %arrayidx2, align 4, !nontemporal !0
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -84,7 +84,7 @@ for.body:                                         ; preds = %for.body, %for.body
 ; CHECK-LABEL: @vectorNTLoadTest(
 ; Check that the vectorized type of the load does not appear.
 ; CHECK-NOT: 4 x i32
-define void @vectorNTLoadTest(i32* noalias readonly %src, i32* noalias %dst, i32 %nElts) {
+define void @vectorNTLoadTest(ptr noalias readonly %src, ptr noalias %dst, i32 %nElts) {
 entry:
   %cmp8 = icmp eq i32 %nElts, 0
   br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
@@ -98,12 +98,12 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %for.body, %for.body.preheader
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %src, i64 %indvars.iv
 ; Check that the load is not vectorized and that we don't lose the !nontemporal hint in it.
-; CHECK: load i32, i32* %arrayidx, align 4, !nontemporal !4
-  %0 = load i32, i32* %arrayidx, align 4, !nontemporal !0
-  %arrayidx2 = getelementptr inbounds i32, i32* %dst, i64 %indvars.iv
-  store i32 %0, i32* %arrayidx2, align 4
+; CHECK: load i32, ptr %arrayidx, align 4, !nontemporal !4
+  %0 = load i32, ptr %arrayidx, align 4, !nontemporal !0
+  %arrayidx2 = getelementptr inbounds i32, ptr %dst, i64 %indvars.iv
+  store i32 %0, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.cond.cleanup, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll
index e286debf32af2..fb232b14a88f4 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll
@@ -25,17 +25,17 @@
 ; CHECK-LABEL: vector.body:
 ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
 ; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ]
-; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, <4 x i64> %[[VecInd]]
+; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]]
 ; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
-; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[VecIndTr]], <4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 ; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
 ; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]]
 ; CHECK: br label %[[InnerLoop:.+]]
 
 ; CHECK: [[InnerLoop]]:
 ; CHECK: %[[InnerPhi:.*]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ]
-; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]]
-; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[StoreVal]], <4 x i32*> %[[AAddr2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true
+; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]]
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true
 ; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], <i64 1, i64 1, i64 1, i64 1>
 ; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], <i64 8, i64 8, i64 8, i64 8>
 ; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0
@@ -54,17 +54,17 @@
 ; AVX-LABEL: vector.body:
 ; AVX: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
 ; AVX: %[[VecInd:.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ]
-; AVX: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, <8 x i64> %[[VecInd]]
+; AVX: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <8 x i64> %[[VecInd]]
 ; AVX: %[[VecIndTr:.*]] = trunc <8 x i64> %[[VecInd]] to <8 x i32>
-; AVX: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %[[VecIndTr]], <8 x i32*> %[[AAddr]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; AVX: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %[[VecIndTr]], <8 x ptr> %[[AAddr]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 ; AVX: %[[VecIndTr2:.*]] = trunc <8 x i64> %[[VecInd]] to <8 x i32>
 ; AVX: %[[StoreVal:.*]] = add nsw <8 x i32> %[[VecIndTr2]], %[[Splat]]
 ; AVX: br label %[[InnerLoop:.+]]
 
 ; AVX: [[InnerLoop]]:
 ; AVX: %[[InnerPhi:.*]] = phi <8 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ]
-; AVX: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, <8 x i64> %[[InnerPhi]], <8 x i64> %[[VecInd]]
-; AVX: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %[[StoreVal]], <8 x i32*> %[[AAddr2]], i32 4, <8 x i1> <i1 true, i1 true, i1 true
+; AVX: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <8 x i64> %[[InnerPhi]], <8 x i64> %[[VecInd]]
+; AVX: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %[[StoreVal]], <8 x ptr> %[[AAddr2]], i32 4, <8 x i1> <i1 true, i1 true, i1 true
 ; AVX: %[[InnerPhiNext]] = add nuw nsw <8 x i64> %[[InnerPhi]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
 ; AVX: %[[VecCond:.*]] = icmp eq <8 x i64> %[[InnerPhiNext]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
 ; AVX: %[[InnerCond:.*]] = extractelement <8 x i1> %[[VecCond]], i32 0
@@ -85,17 +85,17 @@ entry:
 
 for.body:                                         ; preds = %for.inc8, %entry
   %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]
-  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, i64 %indvars.iv21
+  %arrayidx = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, i64 %indvars.iv21
   %0 = trunc i64 %indvars.iv21 to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %1 = trunc i64 %indvars.iv21 to i32
   %add = add nsw i32 %1, %n
   br label %for.body3
 
 for.body3:                                        ; preds = %for.body3, %for.body
   %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
-  %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
-  store i32 %add, i32* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
+  store i32 %add, ptr %arrayidx7, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 8
   br i1 %exitcond, label %for.inc8, label %for.body3

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll b/llvm/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
index cbd2972e8efdb..e023ceb807bf5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
@@ -8,38 +8,38 @@ target triple = "x86_64-unknown-linux-gnu"
 ; now non-vectorizable.
 
 ;CHECK-NOT: <4 x i32>
-define void @parallel_loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+define void @parallel_loop(ptr nocapture %a, ptr nocapture %b) nounwind uwtable {
 entry:
   %indvars.iv.next.reg2mem = alloca i64
   %indvars.iv.reg2mem = alloca i64
   %"reg2mem alloca point" = bitcast i32 0 to i32
-  store i64 0, i64* %indvars.iv.reg2mem
+  store i64 0, ptr %indvars.iv.reg2mem
   br label %for.body
 
 for.body:                                         ; preds = %for.body.for.body_crit_edge, %entry
-  %indvars.iv.reload = load i64, i64* %indvars.iv.reg2mem
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.reload
-  %0 = load i32, i32* %arrayidx, align 4, !llvm.access.group !4
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.reload
-  %1 = load i32, i32* %arrayidx2, align 4, !llvm.access.group !4
+  %indvars.iv.reload = load i64, ptr %indvars.iv.reg2mem
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv.reload
+  %0 = load i32, ptr %arrayidx, align 4, !llvm.access.group !4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.reload
+  %1 = load i32, ptr %arrayidx2, align 4, !llvm.access.group !4
   %idxprom3 = sext i32 %1 to i64
-  %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
-  store i32 %0, i32* %arrayidx4, align 4, !llvm.access.group !4
+  %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %idxprom3
+  store i32 %0, ptr %arrayidx4, align 4, !llvm.access.group !4
   %indvars.iv.next = add i64 %indvars.iv.reload, 1
   ; A new store without the parallel metadata here:
-  store i64 %indvars.iv.next, i64* %indvars.iv.next.reg2mem
-  %indvars.iv.next.reload1 = load i64, i64* %indvars.iv.next.reg2mem
-  %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next.reload1
-  %2 = load i32, i32* %arrayidx6, align 4, !llvm.access.group !4
-  store i32 %2, i32* %arrayidx2, align 4, !llvm.access.group !4
-  %indvars.iv.next.reload = load i64, i64* %indvars.iv.next.reg2mem
+  store i64 %indvars.iv.next, ptr %indvars.iv.next.reg2mem
+  %indvars.iv.next.reload1 = load i64, ptr %indvars.iv.next.reg2mem
+  %arrayidx6 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv.next.reload1
+  %2 = load i32, ptr %arrayidx6, align 4, !llvm.access.group !4
+  store i32 %2, ptr %arrayidx2, align 4, !llvm.access.group !4
+  %indvars.iv.next.reload = load i64, ptr %indvars.iv.next.reg2mem
   %lftr.wideiv = trunc i64 %indvars.iv.next.reload to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
   br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge, !llvm.loop !3
 
 for.body.for.body_crit_edge:                      ; preds = %for.body
-  %indvars.iv.next.reload2 = load i64, i64* %indvars.iv.next.reg2mem
-  store i64 %indvars.iv.next.reload2, i64* %indvars.iv.reg2mem
+  %indvars.iv.next.reload2 = load i64, ptr %indvars.iv.next.reg2mem
+  store i64 %indvars.iv.next.reload2, ptr %indvars.iv.reg2mem
   br label %for.body
 
 for.end:                                          ; preds = %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll
index 9a3abfc24c575..fd6b13abd740a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll
@@ -13,23 +13,23 @@ target triple = "x86_64-unknown-linux-gnu"
 ;    }
 ;}
 
-define void @loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+define void @loop(ptr nocapture %a, ptr nocapture %b) nounwind uwtable {
 ; CHECK-LABEL: @loop(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[IDXPROM3:%.*]] = sext i32 [[TMP1]] to i64
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IDXPROM3]]
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM3]]
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[ARRAYIDX4]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4
-; CHECK-NEXT:    store i32 [[TMP2]], i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    store i32 [[TMP2]], ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[INDVARS_IV_NEXT]], 4294967295
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[TMP3]], 512
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
@@ -41,17 +41,17 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %idxprom3 = sext i32 %1 to i64
-  %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
-  store i32 %0, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %idxprom3
+  store i32 %0, ptr %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
-  %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
-  %2 = load i32, i32* %arrayidx6, align 4
-  store i32 %2, i32* %arrayidx2, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv.next
+  %2 = load i32, ptr %arrayidx6, align 4
+  store i32 %2, ptr %arrayidx2, align 4
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
   br i1 %exitcond, label %for.end, label %for.body
@@ -63,7 +63,7 @@ for.end:                                          ; preds = %for.body
 ; The same loop with parallel loop metadata added to the loop branch
 ; and the memory instructions.
 
-define void @parallel_loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+define void @parallel_loop(ptr nocapture %a, ptr nocapture %b) nounwind uwtable {
 ; CHECK-LABEL: @parallel_loop(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -74,39 +74,36 @@ define void @parallel_loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtabl
 ; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4, !llvm.access.group [[ACC_GRP0:![0-9]+]]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP5]], align 4, !llvm.access.group [[ACC_GRP0]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP6]], align 4, !llvm.access.group [[ACC_GRP0]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP7]], align 4, !llvm.access.group [[ACC_GRP0]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP8]], align 4, !llvm.access.group [[ACC_GRP0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4, !llvm.access.group [[ACC_GRP0]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP0]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4, !llvm.access.group [[ACC_GRP0]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP0]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = sext i32 [[TMP9]] to i64
 ; CHECK-NEXT:    [[TMP14:%.*]] = sext i32 [[TMP10]] to i64
 ; CHECK-NEXT:    [[TMP15:%.*]] = sext i32 [[TMP11]] to i64
 ; CHECK-NEXT:    [[TMP16:%.*]] = sext i32 [[TMP12]] to i64
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP14]]
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]]
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP16]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 0
-; CHECK-NEXT:    store i32 [[TMP21]], i32* [[TMP17]], align 4, !llvm.access.group [[ACC_GRP1:![0-9]+]]
+; CHECK-NEXT:    store i32 [[TMP21]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP1:![0-9]+]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 1
-; CHECK-NEXT:    store i32 [[TMP22]], i32* [[TMP18]], align 4, !llvm.access.group [[ACC_GRP1]]
+; CHECK-NEXT:    store i32 [[TMP22]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP1]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 2
-; CHECK-NEXT:    store i32 [[TMP23]], i32* [[TMP19]], align 4, !llvm.access.group [[ACC_GRP1]]
+; CHECK-NEXT:    store i32 [[TMP23]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP1]]
 ; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3
-; CHECK-NEXT:    store i32 [[TMP24]], i32* [[TMP20]], align 4, !llvm.access.group [[ACC_GRP1]]
+; CHECK-NEXT:    store i32 [[TMP24]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP1]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP25]]
-; CHECK-NEXT:    [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP27]], align 4, !llvm.access.group [[ACC_GRP0]]
-; CHECK-NEXT:    [[TMP28:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[WIDE_LOAD1]], <4 x i32>* [[TMP28]], align 4, !llvm.access.group [[ACC_GRP0]]
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP25]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP0]]
+; CHECK-NEXT:    store <4 x i32> [[WIDE_LOAD1]], ptr [[TMP5]], align 4, !llvm.access.group [[ACC_GRP0]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
 ; CHECK-NEXT:    br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -124,19 +121,19 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4, !llvm.access.group !13
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4, !llvm.access.group !13
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4, !llvm.access.group !13
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4, !llvm.access.group !13
   %idxprom3 = sext i32 %1 to i64
-  %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
+  %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %idxprom3
   ; This store might have originated from inlining a function with a parallel
   ; loop. Refers to a list with the "original loop reference" (!4) also included.
-  store i32 %0, i32* %arrayidx4, align 4, !llvm.access.group !15
+  store i32 %0, ptr %arrayidx4, align 4, !llvm.access.group !15
   %indvars.iv.next = add i64 %indvars.iv, 1
-  %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
-  %2 = load i32, i32* %arrayidx6, align 4, !llvm.access.group !13
-  store i32 %2, i32* %arrayidx2, align 4, !llvm.access.group !13
+  %arrayidx6 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv.next
+  %2 = load i32, ptr %arrayidx6, align 4, !llvm.access.group !13
+  store i32 %2, ptr %arrayidx2, align 4, !llvm.access.group !13
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
@@ -149,23 +146,23 @@ for.end:                                          ; preds = %for.body
 ; accesses refer to a 
diff erent loop's identifier.
 
 
-define void @mixed_metadata(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+define void @mixed_metadata(ptr nocapture %a, ptr nocapture %b) nounwind uwtable {
 ; CHECK-LABEL: @mixed_metadata(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP7]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP7]]
 ; CHECK-NEXT:    [[IDXPROM3:%.*]] = sext i32 [[TMP1]] to i64
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IDXPROM3]]
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM3]]
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP7]]
-; CHECK-NEXT:    store i32 [[TMP2]], i32* [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP7]]
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP7]]
+; CHECK-NEXT:    store i32 [[TMP2]], ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP7]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[INDVARS_IV_NEXT]], 4294967295
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[TMP3]], 512
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
@@ -177,19 +174,19 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4, !llvm.access.group !16
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4, !llvm.access.group !16
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4, !llvm.access.group !16
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4, !llvm.access.group !16
   %idxprom3 = sext i32 %1 to i64
-  %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
+  %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %idxprom3
   ; This refers to the loop marked with !7 which we are not in at the moment.
   ; It should prevent detecting as a parallel loop.
-  store i32 %0, i32* %arrayidx4, align 4, !llvm.access.group !17
+  store i32 %0, ptr %arrayidx4, align 4, !llvm.access.group !17
   %indvars.iv.next = add i64 %indvars.iv, 1
-  %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
-  %2 = load i32, i32* %arrayidx6, align 4, !llvm.access.group !16
-  store i32 %2, i32* %arrayidx2, align 4, !llvm.access.group !16
+  %arrayidx6 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv.next
+  %2 = load i32, ptr %arrayidx6, align 4, !llvm.access.group !16
+  store i32 %2, ptr %arrayidx2, align 4, !llvm.access.group !16
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !6

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/powof2div.ll b/llvm/test/Transforms/LoopVectorize/X86/powof2div.ll
index 10f32f9f4a1d1..65ddd52645748 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/powof2div.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/powof2div.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
 @Foo = common global %struct.anon zeroinitializer, align 4
 
 ; CHECK-LABEL: @foo(
-; CHECK: load <4 x i32>, <4 x i32>*
+; CHECK: load <4 x i32>, ptr
 ; CHECK: sdiv <4 x i32>
 ; CHECK: store <4 x i32>
 
@@ -17,11 +17,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds %struct.anon, %struct.anon* @Foo, i64 0, i32 2, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds %struct.anon, ptr @Foo, i64 0, i32 2, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %div = sdiv i32 %0, 2
-  %arrayidx2 = getelementptr inbounds %struct.anon, %struct.anon* @Foo, i64 0, i32 0, i64 %indvars.iv
-  store i32 %div, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds %struct.anon, ptr @Foo, i64 0, i32 0, i64 %indvars.iv
+  store i32 %div, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 100
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
index f5021101df3a7..04142da386dcb 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
@@ -5,15 +5,13 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
 target triple = "x86_64-unknown-linux-gnu"
 
 ; Ensure that the 'inbounds' is preserved on the GEPs that feed the load and store in the loop.
-define void @foo(i8 addrspace(1)* align 8 dereferenceable_or_null(16), i8 addrspace(1)* align 8 dereferenceable_or_null(8), i64) #0 {
+define void @foo(ptr addrspace(1) align 8 dereferenceable_or_null(16), ptr addrspace(1) align 8 dereferenceable_or_null(8), i64) #0 {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[PREHEADER:%.*]]
 ; CHECK:       preheader:
-; CHECK-NEXT:    [[DOT10:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP0:%.*]], i64 16
-; CHECK-NEXT:    [[DOT11:%.*]] = bitcast i8 addrspace(1)* [[DOT10]] to i8 addrspace(1)* addrspace(1)*
-; CHECK-NEXT:    [[DOT12:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP1:%.*]], i64 16
-; CHECK-NEXT:    [[DOT13:%.*]] = bitcast i8 addrspace(1)* [[DOT12]] to i8 addrspace(1)* addrspace(1)*
+; CHECK-NEXT:    [[DOT10:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0:%.*]], i64 16
+; CHECK-NEXT:    [[DOT12:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1:%.*]], i64 16
 ; CHECK-NEXT:    [[UMAX2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP2:%.*]], i64 1)
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX2]], 16
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
@@ -21,10 +19,10 @@ define void @foo(i8 addrspace(1)* align 8 dereferenceable_or_null(16), i8 addrsp
 ; CHECK-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP2]], i64 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[UMAX]], 3
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP3]], 16
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8 addrspace(1)* [[TMP0]], i64 [[TMP4]]
-; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, i8 addrspace(1)* [[TMP1]], i64 [[TMP4]]
-; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult i8 addrspace(1)* [[DOT10]], [[SCEVGEP1]]
-; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult i8 addrspace(1)* [[DOT12]], [[SCEVGEP]]
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 [[TMP4]]
+; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP1]], i64 [[TMP4]]
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr addrspace(1) [[DOT10]], [[SCEVGEP1]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr addrspace(1) [[DOT12]], [[SCEVGEP]]
 ; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
 ; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
@@ -32,30 +30,22 @@ define void @foo(i8 addrspace(1)* align 8 dereferenceable_or_null(16), i8 addrsp
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[DOT13]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP5]] to <4 x i8 addrspace(1)*> addrspace(1)*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP6]], align 8, !alias.scope !0
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP5]], i64 4
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP7]] to <4 x i8 addrspace(1)*> addrspace(1)*
-; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP8]], align 8, !alias.scope !0
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP5]], i64 8
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP9]] to <4 x i8 addrspace(1)*> addrspace(1)*
-; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP10]], align 8, !alias.scope !0
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP5]], i64 12
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP11]] to <4 x i8 addrspace(1)*> addrspace(1)*
-; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP12]], align 8, !alias.scope !0
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[DOT11]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP13]] to <4 x i8 addrspace(1)*> addrspace(1)*
-; CHECK-NEXT:    store <4 x i8 addrspace(1)*> [[WIDE_LOAD]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP14]], align 8, !alias.scope !3, !noalias !0
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP13]], i64 4
-; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP15]] to <4 x i8 addrspace(1)*> addrspace(1)*
-; CHECK-NEXT:    store <4 x i8 addrspace(1)*> [[WIDE_LOAD3]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP16]], align 8, !alias.scope !3, !noalias !0
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP13]], i64 8
-; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP17]] to <4 x i8 addrspace(1)*> addrspace(1)*
-; CHECK-NEXT:    store <4 x i8 addrspace(1)*> [[WIDE_LOAD4]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP18]], align 8, !alias.scope !3, !noalias !0
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP13]], i64 12
-; CHECK-NEXT:    [[TMP20:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP19]] to <4 x i8 addrspace(1)*> addrspace(1)*
-; CHECK-NEXT:    store <4 x i8 addrspace(1)*> [[WIDE_LOAD5]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP20]], align 8, !alias.scope !3, !noalias !0
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[DOT12]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP5]], align 8, !alias.scope !0
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i64 4
+; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP7]], align 8, !alias.scope !0
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i64 8
+; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP9]], align 8, !alias.scope !0
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i64 12
+; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP11]], align 8, !alias.scope !0
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[DOT10]], i64 [[INDEX]]
+; CHECK-NEXT:    store <4 x ptr addrspace(1)> [[WIDE_LOAD]], ptr addrspace(1) [[TMP13]], align 8, !alias.scope !3, !noalias !0
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP13]], i64 4
+; CHECK-NEXT:    store <4 x ptr addrspace(1)> [[WIDE_LOAD3]], ptr addrspace(1) [[TMP15]], align 8, !alias.scope !3, !noalias !0
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP13]], i64 8
+; CHECK-NEXT:    store <4 x ptr addrspace(1)> [[WIDE_LOAD4]], ptr addrspace(1) [[TMP17]], align 8, !alias.scope !3, !noalias !0
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP13]], i64 12
+; CHECK-NEXT:    store <4 x ptr addrspace(1)> [[WIDE_LOAD5]], ptr addrspace(1) [[TMP19]], align 8, !alias.scope !3, !noalias !0
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -67,10 +57,10 @@ define void @foo(i8 addrspace(1)* align 8 dereferenceable_or_null(16), i8 addrsp
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[INDVARS_IV3:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT4:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[DOT18:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[DOT13]], i64 [[INDVARS_IV3]]
-; CHECK-NEXT:    [[V:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[DOT18]], align 8
-; CHECK-NEXT:    [[DOT20:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[DOT11]], i64 [[INDVARS_IV3]]
-; CHECK-NEXT:    store i8 addrspace(1)* [[V]], i8 addrspace(1)* addrspace(1)* [[DOT20]], align 8
+; CHECK-NEXT:    [[DOT18:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[DOT12]], i64 [[INDVARS_IV3]]
+; CHECK-NEXT:    [[V:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[DOT18]], align 8
+; CHECK-NEXT:    [[DOT20:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[DOT10]], i64 [[INDVARS_IV3]]
+; CHECK-NEXT:    store ptr addrspace(1) [[V]], ptr addrspace(1) [[DOT20]], align 8
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT4]] = add nuw nsw i64 [[INDVARS_IV3]], 1
 ; CHECK-NEXT:    [[DOT21:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT4]], [[TMP2]]
 ; CHECK-NEXT:    br i1 [[DOT21]], label [[LOOP]], label [[LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -81,18 +71,16 @@ entry:
   br label %preheader
 
 preheader:                                       ; preds = %4
-  %.10 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 16
-  %.11 = bitcast i8 addrspace(1)* %.10 to i8 addrspace(1)* addrspace(1)*
-  %.12 = getelementptr inbounds i8, i8 addrspace(1)* %1, i64 16
-  %.13 = bitcast i8 addrspace(1)* %.12 to i8 addrspace(1)* addrspace(1)*
+  %.10 = getelementptr inbounds i8, ptr addrspace(1) %0, i64 16
+  %.12 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 16
   br label %loop
 
 loop:
   %indvars.iv3 = phi i64 [ 0, %preheader ], [ %indvars.iv.next4, %loop ]
-  %.18 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %.13, i64 %indvars.iv3
-  %v = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %.18, align 8
-  %.20 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %.11, i64 %indvars.iv3
-  store i8 addrspace(1)* %v, i8 addrspace(1)* addrspace(1)* %.20, align 8
+  %.18 = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) %.12, i64 %indvars.iv3
+  %v = load ptr addrspace(1), ptr addrspace(1) %.18, align 8
+  %.20 = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) %.10, i64 %indvars.iv3
+  store ptr addrspace(1) %v, ptr addrspace(1) %.20, align 8
   %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1
   %.21 = icmp ult i64 %indvars.iv.next4, %2
   br i1 %.21, label %loop, label %loopexit

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/pr39160.ll b/llvm/test/Transforms/LoopVectorize/X86/pr39160.ll
index 8e414a35f96b9..ba777f52f532b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr39160.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr39160.ll
@@ -64,7 +64,7 @@ bb68:                                             ; preds = %bb62
   br label %bb62
 }
 
-define i32 @foo(i32 addrspace(1)* %p) {
+define i32 @foo(ptr addrspace(1) %p) {
 
 ; CHECK-LABEL: foo
 ; CHECK:       middle.block:
@@ -88,7 +88,7 @@ inner:                                            ; preds = %inner, %outer
   br i1 %5, label %inner, label %outer_latch
 
 outer_latch:                                      ; preds = %inner
-  store atomic i32 %2, i32 addrspace(1)* %p unordered, align 4
+  store atomic i32 %2, ptr addrspace(1) %p unordered, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %6 = icmp ugt i64 %iv, 63
   br i1 %6, label %exit, label %outer

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/pr42674.ll b/llvm/test/Transforms/LoopVectorize/X86/pr42674.ll
index 0b13616f80547..8f89581bf3dbc 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr42674.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr42674.ll
@@ -14,12 +14,10 @@ define zeroext i8 @sum() {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ITER_CHECK:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <64 x i8> [ zeroinitializer, [[ITER_CHECK]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <64 x i8> [ zeroinitializer, [[ITER_CHECK]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [128 x i8], [128 x i8]* @bytes, i64 0, i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <64 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <64 x i8>, <64 x i8>* [[TMP1]], align 16
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 64
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <64 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <64 x i8>, <64 x i8>* [[TMP3]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [128 x i8], ptr @bytes, i64 0, i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <64 x i8>, ptr [[TMP0]], align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 64
+; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <64 x i8>, ptr [[TMP2]], align 16
 ; CHECK-NEXT:    [[TMP4]] = add <64 x i8> [[WIDE_LOAD]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP5]] = add <64 x i8> [[WIDE_LOAD2]], [[VEC_PHI1]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128
@@ -36,8 +34,8 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %r.010 = phi i8 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds [128 x i8], [128 x i8]* @bytes, i64 0, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [128 x i8], ptr @bytes, i64 0, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx, align 1
   %add = add i8 %0, %r.010
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 128

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/pr54413-select-interleave-count-loop-with-cost-zero.ll b/llvm/test/Transforms/LoopVectorize/X86/pr54413-select-interleave-count-loop-with-cost-zero.ll
index 675a7eaa07425..ecab2e6bf4aec 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr54413-select-interleave-count-loop-with-cost-zero.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr54413-select-interleave-count-loop-with-cost-zero.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; After indvars, the backedge taken count for %loop2 becomes 1, but SCEV
 ; retains the cached original BTC, as the loop is in dead code. Make sure
 ; LV does not crash when trying to select an interleave count for a loop with zero cost.
-define void @pr54413(i64* %ptr.base) {
+define void @pr54413(ptr %ptr.base) {
 ; CHECK-LABEL: @pr54413(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP1:%.*]]
@@ -15,10 +15,10 @@ define void @pr54413(i64* %ptr.base) {
 ; CHECK:       loop2.preheader:
 ; CHECK-NEXT:    br label [[LOOP2:%.*]]
 ; CHECK:       loop2:
-; CHECK-NEXT:    [[PTR_NEXT:%.*]] = getelementptr inbounds i64, i64* [[PTR_BASE:%.*]], i64 1
+; CHECK-NEXT:    [[PTR_NEXT:%.*]] = getelementptr inbounds i64, ptr [[PTR_BASE:%.*]], i64 1
 ; CHECK-NEXT:    br i1 true, label [[LOOP2_EXIT:%.*]], label [[LOOP2]]
 ; CHECK:       loop2.exit:
-; CHECK-NEXT:    [[PTR_NEXT_LCSSA:%.*]] = phi i64* [ [[PTR_NEXT]], [[LOOP2]] ]
+; CHECK-NEXT:    [[PTR_NEXT_LCSSA:%.*]] = phi ptr [ [[PTR_NEXT]], [[LOOP2]] ]
 ; CHECK-NEXT:    br label [[LOOP1_LATCH]]
 ; CHECK:       loop1.latch:
 ; CHECK-NEXT:    br label [[LOOP1]]
@@ -34,14 +34,14 @@ loop2.preheader:
 
 loop2:
   %iv = phi i64 [ 0, %loop2.preheader ], [ %iv.next, %loop2 ]
-  %ptr = phi i64* [ %ptr.base, %loop2.preheader ], [ %ptr.next, %loop2 ]
+  %ptr = phi ptr [ %ptr.base, %loop2.preheader ], [ %ptr.next, %loop2 ]
   %iv.next = add nuw nsw i64 %iv, 1
-  %ptr.next = getelementptr inbounds i64, i64* %ptr, i64 1
+  %ptr.next = getelementptr inbounds i64, ptr %ptr, i64 1
   %cmp = icmp eq i64 %iv, 1024
   br i1 %cmp, label %loop2.exit, label %loop2
 
 loop2.exit:
-  %ptr.next.lcssa = phi i64* [ %ptr.next, %loop2 ]
+  %ptr.next.lcssa = phi ptr [ %ptr.next, %loop2 ]
   br label %loop1.latch
 
 loop1.latch:

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-crash.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-crash.ll
index 941c2711d0c27..60eac3e380241 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/reduction-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-crash.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 target triple = "i386-apple-darwin"
 
 ; PR15344
-define void @test1(float* nocapture %arg, i32 %arg1) nounwind {
+define void @test1(ptr nocapture %arg, i32 %arg1) nounwind {
 ; CHECK-LABEL: @test1(
 ; CHECK: preheader
 ; CHECK: insertelement <2 x double> zeroinitializer, double %tmp, i32 0
@@ -14,18 +14,18 @@ bb:
   br label %bb2
 
 bb2:                                              ; preds = %bb
-  %tmp = load double, double* null, align 8
+  %tmp = load double, ptr null, align 8
   br i1 undef, label %bb3, label %bb12
 
 bb3:                                              ; preds = %bb3, %bb2
   %tmp4 = phi double [ %tmp9, %bb3 ], [ %tmp, %bb2 ]
   %tmp5 = phi i32 [ %tmp8, %bb3 ], [ 0, %bb2 ]
-  %tmp6 = getelementptr inbounds [16 x double], [16 x double]* undef, i32 0, i32 %tmp5
-  %tmp7 = load double, double* %tmp6, align 4
+  %tmp6 = getelementptr inbounds [16 x double], ptr undef, i32 0, i32 %tmp5
+  %tmp7 = load double, ptr %tmp6, align 4
   %tmp8 = add nsw i32 %tmp5, 1
   %tmp9 = fadd fast double %tmp4, undef
-  %tmp10 = getelementptr inbounds float, float* %arg, i32 %tmp5
-  store float undef, float* %tmp10, align 4
+  %tmp10 = getelementptr inbounds float, ptr %arg, i32 %tmp5
+  store float undef, ptr %tmp10, align 4
   %tmp11 = icmp eq i32 %tmp8, %arg1
   br i1 %tmp11, label %bb12, label %bb3
 

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll
index 9606e966aa228..7d435cc85c9df 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll
@@ -44,7 +44,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction:   %{{.*}} = icmp
 ; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction:   br
 ;
-define i8 @reduction_i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) {
+define i8 @reduction_i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) {
 entry:
   %cmp.12 = icmp sgt i32 %n, 0
   br i1 %cmp.12, label %for.body.preheader, label %for.cond.cleanup
@@ -64,11 +64,11 @@ for.cond.cleanup:
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %sum.013 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
-  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv
-  %1 = load i8, i8* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv
+  %1 = load i8, ptr %arrayidx2, align 1
   %conv3 = zext i8 %1 to i32
   %conv4 = and i32 %sum.013, 255
   %add = add nuw nsw i32 %conv, %conv4

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/redundant-vf2-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/redundant-vf2-cost.ll
index 7163f441dba02..1660f1af06c6c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/redundant-vf2-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/redundant-vf2-cost.ll
@@ -10,17 +10,17 @@
 ; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction:   store i32
 ; CHECK: LV: Vector loop of width 2 costs: {{[0-9]+}}.
 
-define i32 @foo(i32* %A, i32 %n) {
+define i32 @foo(ptr %A, i32 %n) {
 entry:
   %cmp3.i = icmp eq i32 %n, 0
   br i1 %cmp3.i, label %exit, label %for.body.i
 
 for.body.i:
   %iv = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
-  %ld_addr = getelementptr inbounds i32, i32* %A, i32 %iv
-  %0 = load i32, i32* %ld_addr, align 4
+  %ld_addr = getelementptr inbounds i32, ptr %A, i32 %iv
+  %0 = load i32, ptr %ld_addr, align 4
   %val = add i32 %0, 1
-  store i32 %val, i32* %ld_addr, align 4
+  store i32 %val, ptr %ld_addr, align 4
   %add.i = add nsw i32 %iv, 1
   %cmp.i = icmp eq i32 %add.i, %n
   br i1 %cmp.i, label %exit, label %for.body.i, !llvm.loop !0

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll b/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll
index 079967de64f35..7041a6731919b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll
@@ -28,9 +28,9 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
 ; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
 
-define i32 @test_g(i32* nocapture readonly %a, i32 %n) local_unnamed_addr !dbg !6 {
+define i32 @test_g(ptr nocapture readonly %a, i32 %n) local_unnamed_addr !dbg !6 {
 entry:
-  tail call void @llvm.dbg.value(metadata i32* %a, i64 0, metadata !12, metadata !16), !dbg !17
+  tail call void @llvm.dbg.value(metadata ptr %a, i64 0, metadata !12, metadata !16), !dbg !17
   tail call void @llvm.dbg.value(metadata i32 %n, i64 0, metadata !13, metadata !16), !dbg !18
   tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !15, metadata !16), !dbg !19
   tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !14, metadata !16), !dbg !20
@@ -46,8 +46,8 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %r.08 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv, !dbg !27
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !27, !tbaa !28
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv, !dbg !27
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !27, !tbaa !28
   %add = add i32 %0, %r.08, !dbg !32
   tail call void @llvm.dbg.value(metadata i32 %add, i64 0, metadata !15, metadata !16), !dbg !19
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !33
@@ -70,7 +70,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
 ; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
 
-define i32 @test(i32* nocapture readonly %a, i32 %n) local_unnamed_addr {
+define i32 @test(ptr nocapture readonly %a, i32 %n) local_unnamed_addr {
 entry:
   %cmp6 = icmp eq i32 %n, 0
   br i1 %cmp6, label %for.end, label %for.body.preheader
@@ -82,8 +82,8 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %r.08 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4, !tbaa !28
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4, !tbaa !28
   %add = add i32 %0, %r.08
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll
index 200caf01d9c70..f6191cc53c971 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll
@@ -31,11 +31,11 @@ for.cond.cleanup:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
-  %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %indvars.iv
-  %1 = load i8, i8* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %indvars.iv
+  %1 = load i8, ptr %arrayidx2, align 1
   %conv3 = zext i8 %1 to i32
   %sub = sub nsw i32 %conv, %conv3
   %ispos = icmp sgt i32 %sub, -1
@@ -73,12 +73,12 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %tmp1 = add nsw i64 %indvars.iv, 3
-  %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %tmp1
-  %tmp = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %tmp1
+  %tmp = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %tmp to i32
   %tmp2 = add nsw i64 %indvars.iv, 2
-  %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %tmp2
-  %tmp3 = load i8, i8* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %tmp2
+  %tmp3 = load i8, ptr %arrayidx2, align 1
   %conv3 = zext i8 %tmp3 to i32
   %sub = sub nsw i32 %conv, %conv3
   %ispos = icmp sgt i32 %sub, -1
@@ -90,7 +90,7 @@ for.body:                                         ; preds = %for.body, %entry
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define i64 @bar(i64* nocapture %a) {
+define i64 @bar(ptr nocapture %a) {
 ; CHECK-LABEL: bar
 ; CHECK:       LV(REG): VF = 2
 ; CHECK-NEXT: LV(REG): Found max usage: 2 item
@@ -108,10 +108,10 @@ for.cond.cleanup:
 for.body:
   %i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ]
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.012
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %i.012
+  %0 = load i64, ptr %arrayidx, align 8
   %add = add nsw i64 %0, %i.012
-  store i64 %add, i64* %arrayidx, align 8
+  store i64 %add, ptr %arrayidx, align 8
   %add2 = add nsw i64 %add, %s.011
   %inc = add nuw nsw i64 %i.012, 1
   %exitcond = icmp eq i64 %inc, 1024
@@ -138,12 +138,12 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds [0 x i64], [0 x i64]* @d, i64 0, i64 %indvars.iv
-  %tmp = load i64, i64* %arrayidx, align 8
-  %arrayidx1 = getelementptr inbounds [0 x i32], [0 x i32]* @e, i64 0, i64 %tmp
-  %tmp1 = load i32, i32* %arrayidx1, align 4
-  %arrayidx3 = getelementptr inbounds [0 x i32], [0 x i32]* @c, i64 0, i64 %indvars.iv
-  store i32 %tmp1, i32* %arrayidx3, align 4
+  %arrayidx = getelementptr inbounds [0 x i64], ptr @d, i64 0, i64 %indvars.iv
+  %tmp = load i64, ptr %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds [0 x i32], ptr @e, i64 0, i64 %tmp
+  %tmp1 = load i32, ptr %arrayidx1, align 4
+  %arrayidx3 = getelementptr inbounds [0 x i32], ptr @c, i64 0, i64 %indvars.iv
+  store i32 %tmp1, ptr %arrayidx3, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 10000
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/register-assumption.ll b/llvm/test/Transforms/LoopVectorize/X86/register-assumption.ll
index 7e2655bb550af..0e78193b86974 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/register-assumption.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/register-assumption.ll
@@ -8,7 +8,7 @@ entry:
   br label %loop_exit.dim.11.critedge
 
 loop_exit.dim.11.critedge:                        ; preds = %loop_body.dim.0
-  %ptrint = ptrtoint float* %alloca to i64
+  %ptrint = ptrtoint ptr %alloca to i64
   %maskedptr = and i64 %ptrint, 4
   %maskcond = icmp eq i64 %maskedptr, 0
   br label %loop_header.dim.017.preheader

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll b/llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll
index 01c1ea6193c2d..af5c921c29149 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll
@@ -13,28 +13,28 @@ target triple = "x86_64-unknown-linux"
 ;CHECK-LABEL: func1x6(
 ;CHECK: <4 x i32>
 ;CHECK: ret
-define i32 @func1x6(i32* nocapture %out, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
+define i32 @func1x6(ptr nocapture %out, ptr nocapture %A, ptr nocapture %B, ptr nocapture %C, ptr nocapture %D, ptr nocapture %E, ptr nocapture %F) {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %i.016 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.016
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %B, i64 %i.016
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.016
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %B, i64 %i.016
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %i.016
-  %2 = load i32, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %C, i64 %i.016
+  %2 = load i32, ptr %arrayidx2, align 4
   %add3 = add nsw i32 %add, %2
-  %arrayidx4 = getelementptr inbounds i32, i32* %E, i64 %i.016
-  %3 = load i32, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %E, i64 %i.016
+  %3 = load i32, ptr %arrayidx4, align 4
   %add5 = add nsw i32 %add3, %3
-  %arrayidx6 = getelementptr inbounds i32, i32* %F, i64 %i.016
-  %4 = load i32, i32* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %F, i64 %i.016
+  %4 = load i32, ptr %arrayidx6, align 4
   %add7 = add nsw i32 %add5, %4
-  %arrayidx8 = getelementptr inbounds i32, i32* %out, i64 %i.016
-  store i32 %add7, i32* %arrayidx8, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %out, i64 %i.016
+  store i32 %add7, ptr %arrayidx8, align 4
   %inc = add i64 %i.016, 1
   %exitcond = icmp eq i64 %inc, 256
   br i1 %exitcond, label %for.end, label %for.body
@@ -47,39 +47,39 @@ for.end:                                          ; preds = %for.body
 ;CHECK-LABEL: func2x6(
 ;CHECK: <4 x i32>
 ;CHECK: ret
-define i32 @func2x6(i32* nocapture %out, i32* nocapture %out2, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
+define i32 @func2x6(ptr nocapture %out, ptr nocapture %out2, ptr nocapture %A, ptr nocapture %B, ptr nocapture %C, ptr nocapture %D, ptr nocapture %E, ptr nocapture %F) {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %i.037 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.037
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %B, i64 %i.037
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.037
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %B, i64 %i.037
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %i.037
-  %2 = load i32, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %C, i64 %i.037
+  %2 = load i32, ptr %arrayidx2, align 4
   %add3 = add nsw i32 %add, %2
-  %arrayidx4 = getelementptr inbounds i32, i32* %E, i64 %i.037
-  %3 = load i32, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %E, i64 %i.037
+  %3 = load i32, ptr %arrayidx4, align 4
   %add5 = add nsw i32 %add3, %3
-  %arrayidx6 = getelementptr inbounds i32, i32* %F, i64 %i.037
-  %4 = load i32, i32* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %F, i64 %i.037
+  %4 = load i32, ptr %arrayidx6, align 4
   %add7 = add nsw i32 %add5, %4
-  %arrayidx8 = getelementptr inbounds i32, i32* %out, i64 %i.037
-  store i32 %add7, i32* %arrayidx8, align 4
-  %5 = load i32, i32* %arrayidx, align 4
-  %6 = load i32, i32* %arrayidx1, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %out, i64 %i.037
+  store i32 %add7, ptr %arrayidx8, align 4
+  %5 = load i32, ptr %arrayidx, align 4
+  %6 = load i32, ptr %arrayidx1, align 4
   %add11 = add nsw i32 %6, %5
-  %7 = load i32, i32* %arrayidx2, align 4
+  %7 = load i32, ptr %arrayidx2, align 4
   %add13 = add nsw i32 %add11, %7
-  %8 = load i32, i32* %arrayidx4, align 4
+  %8 = load i32, ptr %arrayidx4, align 4
   %add15 = add nsw i32 %add13, %8
-  %9 = load i32, i32* %arrayidx6, align 4
+  %9 = load i32, ptr %arrayidx6, align 4
   %add17 = add nsw i32 %add15, %9
-  %arrayidx18 = getelementptr inbounds i32, i32* %out2, i64 %i.037
-  store i32 %add17, i32* %arrayidx18, align 4
+  %arrayidx18 = getelementptr inbounds i32, ptr %out2, i64 %i.037
+  store i32 %add17, ptr %arrayidx18, align 4
   %inc = add i64 %i.037, 1
   %exitcond = icmp eq i64 %inc, 256
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll b/llvm/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll
index a08e7ca281aa0..1b8e075fb4e4e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll
@@ -8,7 +8,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define i32 @no_vec(i32 %LastIndex, i16* nocapture readonly %InputData, i16 signext %lag, i16 signext %Scale) {
+define i32 @no_vec(i32 %LastIndex, ptr nocapture readonly %InputData, i16 signext %lag, i16 signext %Scale) {
 entry:
 ; MSG: LV: Selecting VF: 1.
   %cmp17 = icmp sgt i32 %LastIndex, 0
@@ -32,12 +32,12 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %Accumulator.018 = phi i64 [ 0, %for.body.lr.ph ], [ %add7, %for.body ]
-  %arrayidx = getelementptr inbounds i16, i16* %InputData, i64 %indvars.iv
-  %1 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %InputData, i64 %indvars.iv
+  %1 = load i16, ptr %arrayidx, align 2
   %conv = sext i16 %1 to i64
   %2 = add nsw i64 %indvars.iv, %0
-  %arrayidx3 = getelementptr inbounds i16, i16* %InputData, i64 %2
-  %3 = load i16, i16* %arrayidx3, align 2
+  %arrayidx3 = getelementptr inbounds i16, ptr %InputData, i64 %2
+  %3 = load i16, ptr %arrayidx3, align 2
   %conv4 = sext i16 %3 to i64
   %mul = mul nsw i64 %conv4, %conv
   %shr = ashr i64 %mul, %sh_prom

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/struct-store.ll b/llvm/test/Transforms/LoopVectorize/X86/struct-store.ll
index 6c6975b433971..2d2221234a353 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/struct-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/struct-store.ll
@@ -15,8 +15,8 @@ entry:
 
 loop:
   %indvars.iv = phi i64 [ %indvars.iv.next, %loop ], [ 0, %entry ]
-  %tmp = getelementptr inbounds [16 x { i64, i64 }], [16 x { i64, i64 }]* @glbl, i64 0, i64 %indvars.iv
-  store { i64, i64 } { i64 ptrtoint (void ()* @fn to i64), i64 0 }, { i64, i64 }* %tmp, align 16
+  %tmp = getelementptr inbounds [16 x { i64, i64 }], ptr @glbl, i64 0, i64 %indvars.iv
+  store { i64, i64 } { i64 ptrtoint (ptr @fn to i64), i64 0 }, ptr %tmp, align 16
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 16

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
index b87c8ee04da20..5640afc52d3ad 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
@@ -12,7 +12,7 @@ declare float @__expf_finite(float) #0
 ; CHECK-LABEL: @exp_f32
 ; CHECK: <4 x float> @__svml_expf4
 ; CHECK: ret
-define void @exp_f32(float* nocapture %varray) {
+define void @exp_f32(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -21,8 +21,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @__expf_finite(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
@@ -41,7 +41,7 @@ declare double @__exp_finite(double) #0
 ; CHECK-LABEL: @exp_f64
 ; CHECK: <4 x double> @__svml_exp4
 ; CHECK: ret
-define void @exp_f64(double* nocapture %varray) {
+define void @exp_f64(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -50,8 +50,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call fast double @__exp_finite(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
+  store double %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !11
@@ -72,7 +72,7 @@ declare float @__logf_finite(float) #0
 ; CHECK-LABEL: @log_f32
 ; CHECK: <4 x float> @__svml_logf4
 ; CHECK: ret
-define void @log_f32(float* nocapture %varray) {
+define void @log_f32(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -81,8 +81,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @__logf_finite(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
@@ -101,7 +101,7 @@ declare double @__log_finite(double) #0
 ; CHECK-LABEL: @log_f64
 ; CHECK: <4 x double> @__svml_log4
 ; CHECK: ret
-define void @log_f64(double* nocapture %varray) {
+define void @log_f64(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -110,8 +110,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call fast double @__log_finite(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
+  store double %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
@@ -130,7 +130,7 @@ declare float @__powf_finite(float, float) #0
 ; CHECK-LABEL: @pow_f32
 ; CHECK: <4 x float> @__svml_powf4
 ; CHECK: ret
-define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
+define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
 entry:
   br label %for.body
 
@@ -138,11 +138,11 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
-  %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv
-  %tmp1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %indvars.iv
+  %tmp1 = load float, ptr %arrayidx, align 4
   %tmp2 = tail call fast float @__powf_finite(float %conv, float %tmp1)
-  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %tmp2, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %tmp2, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41
@@ -161,7 +161,7 @@ declare double @__pow_finite(double, double) #0
 ; CHECK-LABEL: @pow_f64
 ; CHECK: <4 x double> @__svml_pow4
 ; CHECK: ret
-define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
+define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
 entry:
   br label %for.body
 
@@ -169,11 +169,11 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to double
-  %arrayidx = getelementptr inbounds double, double* %exp, i64 %indvars.iv
-  %tmp1 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %exp, i64 %indvars.iv
+  %tmp1 = load double, ptr %arrayidx, align 4
   %tmp2 = tail call fast double @__pow_finite(double %conv, double %tmp1)
-  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %indvars.iv
-  store double %tmp2, double* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
+  store double %tmp2, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51
@@ -188,7 +188,7 @@ for.end:                                          ; preds = %for.body
 
 declare float @__exp2f_finite(float) #0
 
-define void @exp2f_finite(float* nocapture %varray) {
+define void @exp2f_finite(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2f_finite(
 ; CHECK:    call <4 x float> @__svml_exp2f4(<4 x float> %{{.*}})
 ; CHECK:    ret void
@@ -201,8 +201,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @__exp2f_finite(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !61
@@ -217,7 +217,7 @@ for.end:
 
 declare double @__exp2_finite(double) #0
 
-define void @exp2_finite(double* nocapture %varray) {
+define void @exp2_finite(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_finite(
 ; CHECK:    call <4 x double> @__svml_exp24(<4 x double> {{.*}})
 ; CHECK:    ret void
@@ -230,8 +230,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @__exp2_finite(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !71
@@ -249,7 +249,7 @@ declare float @__log2f_finite(float) #0
 ; CHECK-LABEL: @log2_f32
 ; CHECK: <4 x float> @__svml_log2f4
 ; CHECK: ret
-define void @log2_f32(float* nocapture %varray) {
+define void @log2_f32(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -258,8 +258,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @__log2f_finite(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
@@ -278,7 +278,7 @@ declare double @__log2_finite(double) #0
 ; CHECK-LABEL: @log2_f64
 ; CHECK: <4 x double> @__svml_log24
 ; CHECK: ret
-define void @log2_f64(double* nocapture %varray) {
+define void @log2_f64(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -287,8 +287,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call fast double @__log2_finite(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
+  store double %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
@@ -306,7 +306,7 @@ declare float @__log10f_finite(float) #0
 ; CHECK-LABEL: @log10_f32
 ; CHECK: <4 x float> @__svml_log10f4
 ; CHECK: ret
-define void @log10_f32(float* nocapture %varray) {
+define void @log10_f32(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -315,8 +315,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @__log10f_finite(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
@@ -335,7 +335,7 @@ declare double @__log10_finite(double) #0
 ; CHECK-LABEL: @log10_f64
 ; CHECK: <4 x double> @__svml_log104
 ; CHECK: ret
-define void @log10_f64(double* nocapture %varray) {
+define void @log10_f64(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -344,8 +344,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call fast double @__log10_finite(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
+  store double %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
@@ -363,7 +363,7 @@ declare float @__sqrtf_finite(float) #0
 ; CHECK-LABEL: @sqrt_f32
 ; CHECK: <4 x float> @__svml_sqrtf4
 ; CHECK: ret
-define void @sqrt_f32(float* nocapture %varray) {
+define void @sqrt_f32(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -372,8 +372,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @__sqrtf_finite(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
@@ -392,7 +392,7 @@ declare double @__sqrt_finite(double) #0
 ; CHECK-LABEL: @sqrt_f64
 ; CHECK: <4 x double> @__svml_sqrt4
 ; CHECK: ret
-define void @sqrt_f64(double* nocapture %varray) {
+define void @sqrt_f64(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -401,8 +401,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call fast double @__sqrt_finite(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
+  store double %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
index f843fc1132337..005557d7445ca 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
@@ -46,7 +46,7 @@ declare float @exp2f(float) #0
 declare double @llvm.exp2.f64(double) #0
 declare float @llvm.exp2.f32(float) #0
 
-define void @sin_f64(double* nocapture %varray) {
+define void @sin_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f64(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -59,8 +59,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @sin(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -69,7 +69,7 @@ for.end:
   ret void
 }
 
-define void @sin_f32(float* nocapture %varray) {
+define void @sin_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f32(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sinf4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -82,8 +82,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @sinf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -92,7 +92,7 @@ for.end:
   ret void
 }
 
-define void @sin_f64_intrinsic(double* nocapture %varray) {
+define void @sin_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f64_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -105,8 +105,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.sin.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -115,7 +115,7 @@ for.end:
   ret void
 }
 
-define void @sin_f32_intrinsic(float* nocapture %varray) {
+define void @sin_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f32_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sinf4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -128,8 +128,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.sin.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -138,7 +138,7 @@ for.end:
   ret void
 }
 
-define void @cos_f64(double* nocapture %varray) {
+define void @cos_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f64(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_cos4(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -151,8 +151,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @cos(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -161,7 +161,7 @@ for.end:
   ret void
 }
 
-define void @cos_f32(float* nocapture %varray) {
+define void @cos_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f32(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_cosf4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -174,8 +174,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @cosf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -184,7 +184,7 @@ for.end:
   ret void
 }
 
-define void @cos_f64_intrinsic(double* nocapture %varray) {
+define void @cos_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f64_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_cos4(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -197,8 +197,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.cos.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -207,7 +207,7 @@ for.end:
   ret void
 }
 
-define void @cos_f32_intrinsic(float* nocapture %varray) {
+define void @cos_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f32_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_cosf4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -220,8 +220,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.cos.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -230,7 +230,7 @@ for.end:
   ret void
 }
 
-define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
+define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f64(
 ; CHECK:    [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
 ; CHECK:    ret void
@@ -242,11 +242,11 @@ for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
-  %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
-  %tmp1 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %exp, i64 %iv
+  %tmp1 = load double, ptr %arrayidx, align 4
   %tmp2 = tail call double @pow(double %conv, double %tmp1)
-  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %tmp2, double* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %tmp2, ptr %arrayidx2, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -255,7 +255,7 @@ for.end:
   ret void
 }
 
-define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) {
+define void @pow_f64_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f64_intrinsic(
 ; CHECK:    [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
 ; CHECK:    ret void
@@ -267,11 +267,11 @@ for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
-  %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
-  %tmp1 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %exp, i64 %iv
+  %tmp1 = load double, ptr %arrayidx, align 4
   %tmp2 = tail call double @llvm.pow.f64(double %conv, double %tmp1)
-  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %tmp2, double* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %tmp2, ptr %arrayidx2, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -280,7 +280,7 @@ for.end:
   ret void
 }
 
-define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
+define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32(
 ; CHECK:    [[TMP8:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
 ; CHECK:    ret void
@@ -292,11 +292,11 @@ for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
-  %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
-  %tmp1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %iv
+  %tmp1 = load float, ptr %arrayidx, align 4
   %tmp2 = tail call float @powf(float %conv, float %tmp1)
-  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %tmp2, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %tmp2, ptr %arrayidx2, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -305,7 +305,7 @@ for.end:
   ret void
 }
 
-define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) {
+define void @pow_f32_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32_intrinsic(
 ; CHECK:    [[TMP8:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
 ; CHECK:    ret void
@@ -317,11 +317,11 @@ for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
-  %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
-  %tmp1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %iv
+  %tmp1 = load float, ptr %arrayidx, align 4
   %tmp2 = tail call float @llvm.pow.f32(float %conv, float %tmp1)
-  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %tmp2, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %tmp2, ptr %arrayidx2, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -330,7 +330,7 @@ for.end:
   ret void
 }
 
-define void @exp_f64(double* nocapture %varray) {
+define void @exp_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f64(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -343,8 +343,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @exp(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -353,7 +353,7 @@ for.end:
   ret void
 }
 
-define void @exp_f32(float* nocapture %varray) {
+define void @exp_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -366,8 +366,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @expf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -376,7 +376,7 @@ for.end:
   ret void
 }
 
-define void @exp_f64_intrinsic(double* nocapture %varray) {
+define void @exp_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f64_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -389,8 +389,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.exp.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -399,7 +399,7 @@ for.end:
   ret void
 }
 
-define void @exp_f32_intrinsic(float* nocapture %varray) {
+define void @exp_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -412,8 +412,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.exp.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -422,7 +422,7 @@ for.end:
   ret void
 }
 
-define void @log_f64(double* nocapture %varray) {
+define void @log_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f64(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log4(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -435,8 +435,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @log(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -445,7 +445,7 @@ for.end:
   ret void
 }
 
-define void @log_f32(float* nocapture %varray) {
+define void @log_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f32(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_logf4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -458,8 +458,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @logf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -468,7 +468,7 @@ for.end:
   ret void
 }
 
-define void @log_f64_intrinsic(double* nocapture %varray) {
+define void @log_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f64_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log4(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -481,8 +481,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.log.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -491,7 +491,7 @@ for.end:
   ret void
 }
 
-define void @log_f32_intrinsic(float* nocapture %varray) {
+define void @log_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f32_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_logf4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -504,8 +504,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.log.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -514,7 +514,7 @@ for.end:
   ret void
 }
 
-define void @log2_f64(double* nocapture %varray) {
+define void @log2_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f64(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -527,8 +527,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @log2(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -537,7 +537,7 @@ for.end:
   ret void
 }
 
-define void @log2_f32(float* nocapture %varray) {
+define void @log2_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f32(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -550,8 +550,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @log2f(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -560,7 +560,7 @@ for.end:
   ret void
 }
 
-define void @log2_f64_intrinsic(double* nocapture %varray) {
+define void @log2_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f64_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -573,8 +573,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.log2.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -583,7 +583,7 @@ for.end:
   ret void
 }
 
-define void @log2_f32_intrinsic(float* nocapture %varray) {
+define void @log2_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f32_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -596,8 +596,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.log2.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -606,7 +606,7 @@ for.end:
   ret void
 }
 
-define void @log10_f64(double* nocapture %varray) {
+define void @log10_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f64(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -619,8 +619,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @log10(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -629,7 +629,7 @@ for.end:
   ret void
 }
 
-define void @log10_f32(float* nocapture %varray) {
+define void @log10_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f32(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -642,8 +642,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @log10f(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -652,7 +652,7 @@ for.end:
   ret void
 }
 
-define void @log10_f64_intrinsic(double* nocapture %varray) {
+define void @log10_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f64_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -665,8 +665,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.log10.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -675,7 +675,7 @@ for.end:
   ret void
 }
 
-define void @log10_f32_intrinsic(float* nocapture %varray) {
+define void @log10_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f32_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -688,8 +688,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.log10.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -698,7 +698,7 @@ for.end:
   ret void
 }
 
-define void @sqrt_f64(double* nocapture %varray) {
+define void @sqrt_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @sqrt_f64(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sqrt4(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -711,8 +711,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @sqrt(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -721,7 +721,7 @@ for.end:
   ret void
 }
 
-define void @sqrt_f32(float* nocapture %varray) {
+define void @sqrt_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @sqrt_f32(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sqrtf4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -734,8 +734,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @sqrtf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -744,7 +744,7 @@ for.end:
   ret void
 }
 
-define void @exp2_f64(double* nocapture %varray) {
+define void @exp2_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f64(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -757,8 +757,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @exp2(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -767,7 +767,7 @@ for.end:
   ret void
 }
 
-define void @exp2_f32(float* nocapture %varray) {
+define void @exp2_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f32(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -780,8 +780,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @exp2f(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -790,7 +790,7 @@ for.end:
   ret void
 }
 
-define void @exp2_f64_intrinsic(double* nocapture %varray) {
+define void @exp2_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f64_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -803,8 +803,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.exp2.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -813,7 +813,7 @@ for.end:
   ret void
 }
 
-define void @exp2_f32_intrinsic(float* nocapture %varray) {
+define void @exp2_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f32_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -826,8 +826,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.exp2.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll b/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll
index 982f5becb1fae..b98a2ea54ffe1 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll
@@ -21,7 +21,7 @@ target triple = "x86_64-pc-linux-gnu"
 ;CHECK: call void @llvm.masked.store
 
 ; Function Attrs: nofree norecurse nounwind uwtable
-define dso_local void @fold_tail(i32* noalias nocapture %p, i32* noalias nocapture readonly %q1, i32* noalias nocapture readonly %q2,
+define dso_local void @fold_tail(ptr noalias nocapture %p, ptr noalias nocapture readonly %q1, ptr noalias nocapture readonly %q2,
 i32 %guard) local_unnamed_addr #0 {
 entry:
   %0 = sext i32 %guard to i64
@@ -36,13 +36,13 @@ for.body:
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:
-  %arrayidx = getelementptr inbounds i32, i32* %q1, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx, align 4, !tbaa !2
-  %arrayidx3 = getelementptr inbounds i32, i32* %q2, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx3, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds i32, ptr %q1, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx, align 4, !tbaa !2
+  %arrayidx3 = getelementptr inbounds i32, ptr %q2, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx3, align 4, !tbaa !2
   %add = add nsw i32 %2, %1
-  %arrayidx5 = getelementptr inbounds i32, i32* %p, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx5, align 4, !tbaa !2
+  %arrayidx5 = getelementptr inbounds i32, ptr %p, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx5, align 4, !tbaa !2
   br label %for.inc
 
 for.inc:
@@ -67,7 +67,7 @@ for.inc:
 ;CHECK:  call void @llvm.masked.store
 
 ; Function Attrs: norecurse nounwind uwtable
-define void @assume_safety(i32* nocapture, i32* nocapture readonly, i32* nocapture readonly, i32) local_unnamed_addr #0 {
+define void @assume_safety(ptr nocapture, ptr nocapture readonly, ptr nocapture readonly, i32) local_unnamed_addr #0 {
   %5 = sext i32 %3 to i64
   br label %7
 
@@ -80,13 +80,13 @@ define void @assume_safety(i32* nocapture, i32* nocapture readonly, i32* nocaptu
   br i1 %9, label %10, label %17
 
 ; <label>:10:
-  %11 = getelementptr inbounds i32, i32* %1, i64 %8
-  %12 = load i32, i32* %11, align 4, !tbaa !2, !llvm.mem.parallel_loop_access !6
-  %13 = getelementptr inbounds i32, i32* %2, i64 %8
-  %14 = load i32, i32* %13, align 4, !tbaa !2, !llvm.mem.parallel_loop_access !6
+  %11 = getelementptr inbounds i32, ptr %1, i64 %8
+  %12 = load i32, ptr %11, align 4, !tbaa !2, !llvm.mem.parallel_loop_access !6
+  %13 = getelementptr inbounds i32, ptr %2, i64 %8
+  %14 = load i32, ptr %13, align 4, !tbaa !2, !llvm.mem.parallel_loop_access !6
   %15 = add nsw i32 %14, %12
-  %16 = getelementptr inbounds i32, i32* %0, i64 %8
-  store i32 %15, i32* %16, align 4, !tbaa !2, !llvm.mem.parallel_loop_access !6
+  %16 = getelementptr inbounds i32, ptr %0, i64 %8
+  store i32 %15, ptr %16, align 4, !tbaa !2, !llvm.mem.parallel_loop_access !6
   br label %17
 
 ; <label>:17:
@@ -113,7 +113,7 @@ define void @assume_safety(i32* nocapture, i32* nocapture readonly, i32* nocaptu
 ;CHECK: call void @llvm.masked.store
 
 ; Function Attrs: nofree norecurse nounwind uwtable
-define dso_local void @fold_tail_and_assume_safety(i32* noalias nocapture %p, i32* noalias nocapture readonly %q1, i32* noalias nocapture readonly %q2,
+define dso_local void @fold_tail_and_assume_safety(ptr noalias nocapture %p, ptr noalias nocapture readonly %q1, ptr noalias nocapture readonly %q2,
 i32 %guard) local_unnamed_addr #0 {
 entry:
   %0 = sext i32 %guard to i64
@@ -128,13 +128,13 @@ for.body:
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:
-  %arrayidx = getelementptr inbounds i32, i32* %q1, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx, align 4, !tbaa !2, !llvm.access.group !10
-  %arrayidx3 = getelementptr inbounds i32, i32* %q2, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx3, align 4, !tbaa !2, !llvm.access.group !10
+  %arrayidx = getelementptr inbounds i32, ptr %q1, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx, align 4, !tbaa !2, !llvm.access.group !10
+  %arrayidx3 = getelementptr inbounds i32, ptr %q2, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx3, align 4, !tbaa !2, !llvm.access.group !10
   %add = add nsw i32 %2, %1
-  %arrayidx5 = getelementptr inbounds i32, i32* %p, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx5, align 4, !tbaa !2, !llvm.access.group !10
+  %arrayidx5 = getelementptr inbounds i32, ptr %p, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx5, align 4, !tbaa !2, !llvm.access.group !10
   br label %for.inc
 
 for.inc:

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/tripcount.ll b/llvm/test/Transforms/LoopVectorize/X86/tripcount.ll
index 90e8a3fc22a5a..a788306470ab0 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/tripcount.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/tripcount.ll
@@ -22,10 +22,10 @@ for.body.preheader:
 
 for.body:
   %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @big, i32 0, i32 %i.07
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [0 x i32], ptr @big, i32 0, i32 %i.07
+  %0 = load i32, ptr %arrayidx, align 4
   %neg = xor i32 %0, -1
-  store i32 %neg, i32* %arrayidx, align 4
+  store i32 %neg, ptr %arrayidx, align 4
   %inc = add nsw i32 %i.07, 1
   %conv = sext i32 %inc to i64
   %cmp = icmp slt i64 %conv, %count

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
index 8863477b72c11..ef0402aaae802 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
@@ -8,16 +8,16 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; CHECK: cost of 4 for VF 1 For instruction:   %conv = uitofp i64 %tmp to double
 ; CHECK: cost of 5 for VF 2 For instruction:   %conv = uitofp i64 %tmp to double
 ; CHECK: cost of 10 for VF 4 For instruction:   %conv = uitofp i64 %tmp to double
-define void @uint64_to_double_cost(i64* noalias nocapture %a, double* noalias nocapture readonly %b) nounwind {
+define void @uint64_to_double_cost(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) nounwind {
 entry:
   br label %for.body
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %indvars.iv
-  %tmp = load i64, i64* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  %tmp = load i64, ptr %arrayidx, align 4
   %conv = uitofp i64 %tmp to double
-  %arrayidx2 = getelementptr inbounds double, double* %b, i64 %indvars.iv
-  store double %conv, double* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds double, ptr %b, i64 %indvars.iv
+  store double %conv, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 256
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/uniform-phi.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform-phi.ll
index 96f92c9e84e27..f0641154f85c7 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniform-phi.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniform-phi.ll
@@ -8,17 +8,17 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK-DAG: LV: Found uniform instruction:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 ; CHECK-DAG: LV: Found uniform instruction:   %exitcond = icmp eq i64 %indvars.iv, 1599
 
-define void @test(float* noalias nocapture %a, float* noalias nocapture readonly %b) #0 {
+define void @test(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %tmp0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %tmp0 = load float, ptr %arrayidx, align 4
   %add = fadd float %tmp0, 1.000000e+00
-  %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, 1599
   br i1 %exitcond, label %for.end, label %for.body
@@ -29,18 +29,18 @@ for.end:                                          ; preds = %for.body
 
 ; CHECK-LABEL: foo
 ; CHECK-DAG: LV: Found uniform instruction:   %cond = icmp eq i64 %i.next, %n
-; CHECK-DAG: LV: Found uniform instruction:   %tmp1 = getelementptr inbounds i32, i32* %a, i32 %tmp0
+; CHECK-DAG: LV: Found uniform instruction:   %tmp1 = getelementptr inbounds i32, ptr %a, i32 %tmp0
 ; CHECK-NOT: LV: Found uniform instruction:   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
 
-define void @foo(i32* %a, i64 %n) {
+define void @foo(ptr %a, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
   %tmp0 = trunc i64 %i to i32
-  %tmp1 = getelementptr inbounds i32, i32* %a, i32 %tmp0
-  store i32 %tmp0, i32* %tmp1, align 4
+  %tmp1 = getelementptr inbounds i32, ptr %a, i32 %tmp0
+  store i32 %tmp0, ptr %tmp1, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp eq i64 %i.next, %n
   br i1 %cond, label %for.end, label %for.body
@@ -55,17 +55,17 @@ for.end:
 ; CHECK-DAG: LV: Found uniform instruction:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 ; CHECK-DAG: LV: Found uniform instruction:   %exitcond = icmp eq i64 %indvars.iv, 1599
 
-define i64 @goo(float* noalias nocapture %a, float* noalias nocapture readonly %b) #0 {
+define i64 @goo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %tmp0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %tmp0 = load float, ptr %arrayidx, align 4
   %add = fadd float %tmp0, 1.000000e+00
-  %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, 1599
   br i1 %exitcond, label %for.end, label %for.body
@@ -78,7 +78,7 @@ for.end:                                          ; preds = %for.body
 ; CHECK-LABEL: PR38786
 ; Check that first order recurrence phis (%phi32 and %phi64) are not uniform.
 ; CHECK-NOT: LV: Found uniform instruction:   %phi
-define void @PR38786(double* %y, double* %x, i64 %n) {
+define void @PR38786(ptr %y, ptr %x, i64 %n) {
 entry:
   br label %for.body
 
@@ -87,10 +87,10 @@ for.body:
   %phi64 = phi i64 [ 0, %entry ], [ %i64next, %for.body ]
   %i32next = add i32 %phi32, 1
   %i64next = zext i32 %i32next to i64
-  %xip = getelementptr inbounds double, double* %x, i64 %i64next
-  %yip = getelementptr inbounds double, double* %y, i64 %phi64
-  %xi = load double, double* %xip, align 8
-  store double %xi, double* %yip, align 8
+  %xip = getelementptr inbounds double, ptr %x, i64 %i64next
+  %yip = getelementptr inbounds double, ptr %y, i64 %phi64
+  %xi = load double, ptr %xip, align 8
+  store double %xi, ptr %yip, align 8
   %cmp = icmp slt i64 %i64next, %n
   br i1 %cmp, label %for.body, label %for.end
 

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
index 45812ff41187e..7b2e65d1b02e3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -passes=loop-vectorize -S -mcpu=core-avx2 < %s | FileCheck %s
 
 ;float inc = 0.5;
-;void foo(float *A, unsigned N) {
+;void foo(ptr A, unsigned N) {
 ;
 ;  for (unsigned i=0; i<N; i++){
 ;    A[i] += inc;
@@ -19,7 +19,7 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @inc = global float 5.000000e-01, align 4
 
-define void @foo(float* nocapture %A, i32 %N) #0 {
+define void @foo(ptr nocapture %A, i32 %N) #0 {
 entry:
   %cmp3 = icmp eq i32 %N, 0
   br i1 %cmp3, label %for.end, label %for.body.preheader
@@ -29,11 +29,11 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %0 = load float, float* @inc, align 4
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %1 = load float, float* %arrayidx, align 4
+  %0 = load float, ptr @inc, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx, align 4
   %add = fadd float %0, %1
-  store float %add, float* %arrayidx, align 4
+  store float %add, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %N

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
index cac4757ccc20b..d56740c8293c0 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
@@ -5,16 +5,16 @@
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %shift = ashr i32 %val, %k
 ; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction:   %shift = ashr i32 %val, %k
 ; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %shift = ashr i32 %val, %k
-define void @foo(i32* nocapture %p, i32 %k) local_unnamed_addr #0 {
+define void @foo(ptr nocapture %p, i32 %k) local_unnamed_addr #0 {
 entry:
   br label %body
 
 body:
   %i = phi i64 [ 0, %entry ], [ %next, %body ]
-  %ptr = getelementptr inbounds i32, i32* %p, i64 %i
-  %val = load i32, i32* %ptr, align 4
+  %ptr = getelementptr inbounds i32, ptr %p, i64 %i
+  %val = load i32, ptr %ptr, align 4
   %shift = ashr i32 %val, %k
-  store i32 %shift, i32* %ptr, align 4
+  store i32 %shift, ptr %ptr, align 4
   %next = add nuw nsw i64 %i, 1
   %cmp = icmp eq i64 %next, 16
   br i1 %cmp, label %exit, label %body

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll b/llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll
index fc1c1540177cd..8971dfe507240 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll
@@ -11,16 +11,16 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;CHECK-NOUNRL: store <4 x i32>
 ;CHECK-NOUNRL-NOT: store <4 x i32>
 ;CHECK-NOUNRL: ret
-define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp {
+define i32 @bar(ptr nocapture %A, i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
   %4 = add nsw i32 %3, 6
-  store i32 %4, i32* %2, align 4
+  store i32 %4, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll b/llvm/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
index d8cec93afe2b6..290be569bc125 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
@@ -16,20 +16,20 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; CHECK-VECTOR: ret
 ;
 ; CHECK-SCALAR-LABEL: @foo(
-; CHECK-SCALAR: load i32, i32*
-; CHECK-SCALAR-NOT: load i32, i32*
+; CHECK-SCALAR: load i32, ptr
+; CHECK-SCALAR-NOT: load i32, ptr
 ; CHECK-SCALAR: store i32
 ; CHECK-SCALAR-NOT: store i32
 ; CHECK-SCALAR: ret
-define i32 @foo(i32* nocapture %A) nounwind uwtable ssp {
+define i32 @foo(ptr nocapture %A) nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
   %4 = add nsw i32 %3, 6
-  store i32 %4, i32* %2, align 4
+  store i32 %4, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 100
@@ -53,16 +53,16 @@ define i32 @foo(i32* nocapture %A) nounwind uwtable ssp {
 ; CHECK-SCALAR: store i32
 ; CHECK-SCALAR-NOT: store i32
 ; CHECK-SCALAR: ret
-define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp {
+define i32 @bar(ptr nocapture %A, i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
   %4 = add nsw i32 %3, 6
-  store i32 %4, i32* %2, align 4
+  store i32 %4, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -82,17 +82,17 @@ define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp {
 ; CHECK-SCALAR-LABEL: @runtime_chk(
 ; CHECK-SCALAR: store float
 ; CHECK-SCALAR-NOT: store float
-define void @runtime_chk(float* %A, float* %B, float %N) {
+define void @runtime_chk(ptr %A, ptr %B, float %N) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %mul = fmul float %0, %N
-  %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  store float %mul, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  store float %mul, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 256
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/unroll_selection.ll b/llvm/test/Transforms/LoopVectorize/X86/unroll_selection.ll
index 3302bdb55e797..27dcac6c74e10 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/unroll_selection.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/unroll_selection.ll
@@ -10,14 +10,14 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;CHECK: store <4 x double>
 ;CHECK-NOT: store <4 x double>
 ;CHECK: ret
-define void @reg_pressure(double* nocapture %A, i32 %n) nounwind uwtable ssp {
+define void @reg_pressure(ptr nocapture %A, i32 %n) nounwind uwtable ssp {
   %1 = sext i32 %n to i64
   br label %2
 
 ; <label>:2                                       ; preds = %2, %0
   %indvars.iv = phi i64 [ %indvars.iv.next, %2 ], [ %1, %0 ]
-  %3 = getelementptr inbounds double, double* %A, i64 %indvars.iv
-  %4 = load double, double* %3, align 8
+  %3 = getelementptr inbounds double, ptr %A, i64 %indvars.iv
+  %4 = load double, ptr %3, align 8
   %5 = fadd double %4, 3.000000e+00
   %6 = fmul double %4, 2.000000e+00
   %7 = fadd double %5, %6
@@ -37,7 +37,7 @@ define void @reg_pressure(double* nocapture %A, i32 %n) nounwind uwtable ssp {
   %21 = fadd double %20, %17
   %22 = fadd double %21, 3.000000e+00
   %23 = fmul double %4, %22
-  store double %23, double* %3, align 8
+  store double %23, ptr %3, align 8
   %indvars.iv.next = add i64 %indvars.iv, -1
   %24 = trunc i64 %indvars.iv to i32
   %25 = icmp eq i32 %24, 0
@@ -52,16 +52,16 @@ define void @reg_pressure(double* nocapture %A, i32 %n) nounwind uwtable ssp {
 ;CHECK: xor
 ;CHECK: xor
 ;CHECK: ret
-define void @small_loop(i16* nocapture %A, i64 %n) nounwind uwtable ssp {
+define void @small_loop(ptr nocapture %A, i64 %n) nounwind uwtable ssp {
   %1 = icmp eq i64 %n, 0
   br i1 %1, label %._crit_edge, label %.lr.ph
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %i.01 = phi i64 [ %5, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i16, i16* %A, i64 %i.01
-  %3 = load i16, i16* %2, align 2
+  %2 = getelementptr inbounds i16, ptr %A, i64 %i.01
+  %3 = load i16, ptr %2, align 2
   %4 = xor i16 %3, 3
-  store i16 %4, i16* %2, align 2
+  store i16 %4, ptr %2, align 2
   %5 = add i64 %i.01, 1
   %exitcond = icmp eq i64 %5, %n
   br i1 %exitcond, label %._crit_edge, label %.lr.ph

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll
index 50c009cbef8b5..2e78a96a44b74 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll
@@ -7,18 +7,18 @@ target triple = "x86_64-unknown-linux-gnu"
 ;CHECK: vsqrtf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @sqrtf(float) nounwind readnone
-define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @sqrt_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @sqrtf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -32,18 +32,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vexpf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @expf(float) nounwind readnone
-define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @exp_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @expf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -57,18 +57,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vlogf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @logf(float) nounwind readnone
-define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @log_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @logf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -83,18 +83,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: fabs{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @fabsf(float) nounwind readnone
-define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @fabs_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @fabsf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -109,18 +109,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vexpf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @llvm.exp.f32(float) nounwind readnone
-define void @exp_f32_intrin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @exp_f32_intrin(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.exp.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -135,18 +135,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK-NOT: foo{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @foo(float) nounwind readnone
-define void @foo_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @foo_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @foo(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -160,18 +160,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK-LABEL: @sqrt_f32_nobuiltin(
 ;CHECK-NOT: vsqrtf{{.*}}<4 x float>
 ;CHECK: ret void
-define void @sqrt_f32_nobuiltin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @sqrt_f32_nobuiltin(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @sqrtf(float %0) nounwind readnone nobuiltin
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -185,18 +185,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vceilf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @ceilf(float) nounwind readnone
-define void @ceil_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @ceil_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @ceilf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -210,18 +210,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vfloorf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @floorf(float) nounwind readnone
-define void @floor_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @floor_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @floorf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -235,18 +235,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vexpm1f{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @expm1f(float) nounwind readnone
-define void @expm1_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @expm1_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @expm1f(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -260,18 +260,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vlog1pf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @log1pf(float) nounwind readnone
-define void @log1p_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @log1p_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @log1pf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -285,18 +285,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vlog10f{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @log10f(float) nounwind readnone
-define void @log10_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @log10_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @log10f(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -310,18 +310,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vlogbf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @logbf(float) nounwind readnone
-define void @logb_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @logb_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @logbf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -335,18 +335,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vsinf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @sinf(float) nounwind readnone
-define void @sin_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @sin_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @sinf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -360,18 +360,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vcosf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @cosf(float) nounwind readnone
-define void @cos_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @cos_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @cosf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -385,18 +385,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vtanf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @tanf(float) nounwind readnone
-define void @tan_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @tan_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @tanf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -410,18 +410,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vasinf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @asinf(float) nounwind readnone
-define void @asin_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @asin_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @asinf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -435,18 +435,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vacosf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @acosf(float) nounwind readnone
-define void @acos_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @acos_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @acosf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -460,18 +460,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vatanf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @atanf(float) nounwind readnone
-define void @atan_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @atan_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @atanf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -485,18 +485,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vsinhf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @sinhf(float) nounwind readnone
-define void @sinh_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @sinh_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @sinhf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -510,18 +510,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vcoshf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @coshf(float) nounwind readnone
-define void @cosh_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @cosh_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @coshf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -535,18 +535,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vtanhf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @tanhf(float) nounwind readnone
-define void @tanh_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @tanh_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @tanhf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -560,18 +560,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vasinhf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @asinhf(float) nounwind readnone
-define void @asinh_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @asinh_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @asinhf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -585,18 +585,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vacoshf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @acoshf(float) nounwind readnone
-define void @acosh_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @acosh_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @acoshf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -610,18 +610,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vatanhf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @atanhf(float) nounwind readnone
-define void @atanh_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @atanh_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @atanhf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
index 73f13c7cc15f9..4c0e07e912c84 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
@@ -15,7 +15,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; The source code for the test:
 ;
 ; #include <math.h>
-; void foo(float* restrict A, float * restrict B)
+; void foo(ptr restrict A, ptr restrict B)
 ; {
 ;   for (int i = 0; i < 1000; i+=2) A[i] = sinf(B[i]);
 ; }
@@ -25,17 +25,17 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; This loop will be vectorized, although the scalar cost is lower than any of vector costs, but vectorization is explicitly forced in metadata.
 ;
 
-define void @vectorized(float* noalias nocapture %A, float* noalias nocapture %B) {
+define void @vectorized(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4, !llvm.access.group !11
+  %arrayidx = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4, !llvm.access.group !11
   %call = tail call float @llvm.sin.f32(float %0)
-  %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4, !llvm.access.group !11
+  %arrayidx2 = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4, !llvm.access.group !11
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1000
@@ -56,17 +56,17 @@ for.end:
 ; This method will not be vectorized, as scalar cost is lower than any of vector costs.
 ;
 
-define void @not_vectorized(float* noalias nocapture %A, float* noalias nocapture %B) {
+define void @not_vectorized(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4, !llvm.access.group !13
+  %arrayidx = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4, !llvm.access.group !13
   %call = tail call float @llvm.sin.f32(float %0)
-  %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4, !llvm.access.group !13
+  %arrayidx2 = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4, !llvm.access.group !13
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1000

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll
index 04b95e421561e..234587aae1283 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll
@@ -15,19 +15,19 @@ define void @scalarselect(i1 %cond) {
 
 ; <label>:1
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %7 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
 
 ; CHECK: cost of 1 for VF 1 {{.*}}  select i1 %cond, i32 %6, i32 0
 ; CHECK: cost of 2 for VF 2 {{.*}}  select i1 %cond, i32 %6, i32 0
 ; CHECK: cost of 2 for VF 4 {{.*}}  select i1 %cond, i32 %6, i32 0
 
   %sel = select i1 %cond, i32 %6, i32 zeroinitializer
-  store i32 %sel, i32* %7, align 4
+  store i32 %sel, ptr %7, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 256
@@ -43,12 +43,12 @@ define void @vectorselect(i1 %cond) {
 
 ; <label>:1
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %7 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
   %8 = icmp ult i64 %indvars.iv, 8
 
 ; CHECK: cost of 1 for VF 1 {{.*}}  select i1 %8, i32 %6, i32 0
@@ -56,7 +56,7 @@ define void @vectorselect(i1 %cond) {
 ; CHECK: cost of 2 for VF 4 {{.*}}  select i1 %8, i32 %6, i32 0
 
   %sel = select i1 %8, i32 %6, i32 zeroinitializer
-  store i32 %sel, i32* %7, align 4
+  store i32 %sel, ptr %7, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 256

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll b/llvm/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
index e9149cfba6fd3..2a6b15bb1a74b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
@@ -28,20 +28,20 @@ for.cond.cleanup:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds [1000 x i8], [1000 x i8]* @b, i64 0, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx2 = getelementptr inbounds [1000 x i8], [1000 x i8]* @c, i64 0, i64 %indvars.iv
-  %1 = load i8, i8* %arrayidx2, align 1
+  %arrayidx = getelementptr inbounds [1000 x i8], ptr @b, i64 0, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx2 = getelementptr inbounds [1000 x i8], ptr @c, i64 0, i64 %indvars.iv
+  %1 = load i8, ptr %arrayidx2, align 1
   %add = add i8 %1, %0
-  %arrayidx6 = getelementptr inbounds [1000 x i8], [1000 x i8]* @a, i64 0, i64 %indvars.iv
-  store i8 %add, i8* %arrayidx6, align 1
-  %arrayidx8 = getelementptr inbounds [1000 x i32], [1000 x i32]* @v, i64 0, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx8, align 4
-  %arrayidx10 = getelementptr inbounds [1000 x i32], [1000 x i32]* @w, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %arrayidx10, align 4
+  %arrayidx6 = getelementptr inbounds [1000 x i8], ptr @a, i64 0, i64 %indvars.iv
+  store i8 %add, ptr %arrayidx6, align 1
+  %arrayidx8 = getelementptr inbounds [1000 x i32], ptr @v, i64 0, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx8, align 4
+  %arrayidx10 = getelementptr inbounds [1000 x i32], ptr @w, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %arrayidx10, align 4
   %add11 = add nsw i32 %3, %2
-  %arrayidx13 = getelementptr inbounds [1000 x i32], [1000 x i32]* @u, i64 0, i64 %indvars.iv
-  store i32 %add11, i32* %arrayidx13, align 4
+  %arrayidx13 = getelementptr inbounds [1000 x i32], ptr @u, i64 0, i64 %indvars.iv
+  store i32 %add11, ptr %arrayidx13, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -49,7 +49,7 @@ for.body:
 
 ; We should not choose a VF larger than the constant TC.
 ; VF chosen should be atmost 16 (not the max possible vector width = 32 for AVX2)
-define void @not_too_small_tc(i8* noalias nocapture %A, i8* noalias nocapture readonly %B) {
+define void @not_too_small_tc(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
 ; CHECK-LABEL: not_too_small_tc
 ; CHECK-AVX2: LV: Selecting VF: 16.
 entry:
@@ -57,12 +57,12 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %B, i64 %indvars.iv
-  %l1 = load i8, i8* %arrayidx, align 4, !llvm.access.group !13
-  %arrayidx2 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv
-  %l2 = load i8, i8* %arrayidx2, align 4, !llvm.access.group !13
+  %arrayidx = getelementptr inbounds i8, ptr %B, i64 %indvars.iv
+  %l1 = load i8, ptr %arrayidx, align 4, !llvm.access.group !13
+  %arrayidx2 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv
+  %l2 = load i8, ptr %arrayidx2, align 4, !llvm.access.group !13
   %add = add i8 %l1, %l2
-  store i8 %add, i8* %arrayidx2, align 4, !llvm.access.group !13
+  store i8 %add, ptr %arrayidx2, align 4, !llvm.access.group !13
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 16
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
index 9f798ca4d49ca..5d110e7b40bff 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
@@ -4,10 +4,10 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-%0 = type { %0*, %1 }
-%1 = type { i8*, i32 }
+%0 = type { ptr, %1 }
+%1 = type { ptr, i32 }
 
- at p = global [2048 x [8 x i32*]] zeroinitializer, align 16
+ at p = global [2048 x [8 x ptr]] zeroinitializer, align 16
 @q = global [2048 x i16] zeroinitializer, align 16
 @r = global [2048 x i16] zeroinitializer, align 16
 
@@ -19,19 +19,19 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; CHECK: test_consecutive_store
 ; CHECK: LV: The Smallest and Widest types: 64 / 64 bits.
 ; CHECK: LV: Selecting VF: 4
-define void @test_consecutive_store(%0**, %0**, %0** nocapture) nounwind ssp uwtable align 2 {
-  %4 = load %0*, %0** %2, align 8
-  %5 = icmp eq %0** %0, %1
+define void @test_consecutive_store(ptr, ptr, ptr nocapture) nounwind ssp uwtable align 2 {
+  %4 = load ptr, ptr %2, align 8
+  %5 = icmp eq ptr %0, %1
   br i1 %5, label %12, label %6
 
 ; <label>:6                                       ; preds = %3
   br label %7
 
 ; <label>:7                                       ; preds = %7, %6
-  %8 = phi %0** [ %0, %6 ], [ %9, %7 ]
-  store %0* %4, %0** %8, align 8
-  %9 = getelementptr inbounds %0*, %0** %8, i64 1
-  %10 = icmp eq %0** %9, %1
+  %8 = phi ptr [ %0, %6 ], [ %9, %7 ]
+  store ptr %4, ptr %8, align 8
+  %9 = getelementptr inbounds ptr, ptr %8, i64 1
+  %10 = icmp eq ptr %9, %1
   br i1 %10, label %11, label %7
 
 ; <label>:11                                      ; preds = %7
@@ -63,13 +63,13 @@ define void @test_nonconsecutive_store() nounwind ssp uwtable {
 
 ; <label>:3                                       ; preds = %3, %1
   %4 = phi i64 [ 0, %1 ], [ %11, %3 ]
-  %5 = getelementptr inbounds [2048 x i16], [2048 x i16]* @q, i64 0, i64 %4
-  %6 = load i16, i16* %5, align 2
+  %5 = getelementptr inbounds [2048 x i16], ptr @q, i64 0, i64 %4
+  %6 = load i16, ptr %5, align 2
   %7 = sext i16 %6 to i64
   %8 = add i64 %7, 1
-  %9 = inttoptr i64 %8 to i32*
-  %10 = getelementptr inbounds [2048 x [8 x i32*]], [2048 x [8 x i32*]]* @p, i64 0, i64 %4, i64 %2
-  store i32* %9, i32** %10, align 8
+  %9 = inttoptr i64 %8 to ptr
+  %10 = getelementptr inbounds [2048 x [8 x ptr]], ptr @p, i64 0, i64 %4, i64 %2
+  store ptr %9, ptr %10, align 8
   %11 = add i64 %4, 1
   %12 = trunc i64 %11 to i32
   %13 = icmp ne i32 %12, 1024
@@ -86,10 +86,10 @@ define void @test_nonconsecutive_store() nounwind ssp uwtable {
 }
 
 
- at ia = global [1024 x i32*] zeroinitializer, align 16
+ at ia = global [1024 x ptr] zeroinitializer, align 16
 @ib = global [1024 x i32] zeroinitializer, align 16
 @ic = global [1024 x i8] zeroinitializer, align 16
- at p2 = global [2048 x [8 x i32*]] zeroinitializer, align 16
+ at p2 = global [2048 x [8 x ptr]] zeroinitializer, align 16
 @q2 = global [2048 x i16] zeroinitializer, align 16
 
 ;; Now we check the same rules for loads. We should take consecutive loads of
@@ -103,9 +103,9 @@ define i8 @test_consecutive_ptr_load() nounwind readonly ssp uwtable {
 ; <label>:1                                       ; preds = %1, %0
   %2 = phi i64 [ 0, %0 ], [ %10, %1 ]
   %3 = phi i8 [ 0, %0 ], [ %9, %1 ]
-  %4 = getelementptr inbounds [1024 x i32*], [1024 x i32*]* @ia, i32 0, i64 %2
-  %5 = load i32*, i32** %4, align 4
-  %6 = ptrtoint i32* %5 to i64
+  %4 = getelementptr inbounds [1024 x ptr], ptr @ia, i32 0, i64 %2
+  %5 = load ptr, ptr %4, align 4
+  %6 = ptrtoint ptr %5 to i64
   %7 = trunc i64 %6 to i8
   %8 = add i8 %3, 1
   %9 = add i8 %7, %8
@@ -131,12 +131,12 @@ define void @test_nonconsecutive_ptr_load() nounwind ssp uwtable {
 
 ; <label>:3                                       ; preds = %3, %1
   %4 = phi i64 [ 0, %1 ], [ %10, %3 ]
-  %5 = getelementptr inbounds [2048 x [8 x i32*]], [2048 x [8 x i32*]]* @p2, i64 0, i64 %4, i64 %2
-  %6 = getelementptr inbounds [2048 x i16], [2048 x i16]* @q2, i64 0, i64 %4
-  %7 = load i32*, i32** %5, align 2
-  %8 = ptrtoint i32* %7 to i64
+  %5 = getelementptr inbounds [2048 x [8 x ptr]], ptr @p2, i64 0, i64 %4, i64 %2
+  %6 = getelementptr inbounds [2048 x i16], ptr @q2, i64 0, i64 %4
+  %7 = load ptr, ptr %5, align 2
+  %8 = ptrtoint ptr %7 to i64
   %9 = trunc i64 %8 to i16
-  store i16 %9, i16* %6, align 8
+  store i16 %9, ptr %6, align 8
   %10 = add i64 %4, 1
   %11 = trunc i64 %10 to i32
   %12 = icmp ne i32 %11, 1024

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll
index 6429a3993c2c5..4dfbdf1adee2c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll
@@ -17,17 +17,17 @@ entry:
   %
diff  = alloca i32, align 4
   %cb = alloca [16 x i8], align 16
   %cc = alloca [16 x i8], align 16
-  store i32 0, i32* %
diff , align 4, !tbaa !11
+  store i32 0, ptr %
diff , align 4, !tbaa !11
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %add8 = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds [16 x i8], [16 x i8]* %cb, i64 0, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1, !tbaa !21
+  %arrayidx = getelementptr inbounds [16 x i8], ptr %cb, i64 0, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx, align 1, !tbaa !21
   %conv = sext i8 %0 to i32
-  %arrayidx2 = getelementptr inbounds [16 x i8], [16 x i8]* %cc, i64 0, i64 %indvars.iv
-  %1 = load i8, i8* %arrayidx2, align 1, !tbaa !21
+  %arrayidx2 = getelementptr inbounds [16 x i8], ptr %cc, i64 0, i64 %indvars.iv
+  %1 = load i8, ptr %arrayidx2, align 1, !tbaa !21
   %conv3 = sext i8 %1 to i32
   %sub = sub i32 %conv, %conv3
   %add = add nsw i32 %sub, %add8
@@ -36,12 +36,12 @@ for.body:                                         ; preds = %for.body, %entry
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !25
 
 for.end:                                          ; preds = %for.body
-  store i32 %add, i32* %
diff , align 4, !tbaa !11
-  call void @ibar(i32* %
diff ) #2
+  store i32 %add, ptr %
diff , align 4, !tbaa !11
+  call void @ibar(ptr %
diff ) #2
   ret i32 0
 }
 
-declare void @ibar(i32*) #1
+declare void @ibar(ptr) #1
 
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
index 8673e58320bcb..de2fe124ea63b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -136,17 +136,17 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z4testPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 {
+define void @_Z4testPii(ptr nocapture %A, i32 %Length) #0 !dbg !4 {
 entry:
   %cmp10 = icmp sgt i32 %Length, 0, !dbg !12
   br i1 %cmp10, label %for.body, label %for.end, !dbg !12, !llvm.loop !14
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !16
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !16
   %0 = trunc i64 %indvars.iv to i32, !dbg !16
-  %ld = load i32, i32* %arrayidx, align 4
-  store i32 %0, i32* %arrayidx, align 4, !dbg !16, !tbaa !18
+  %ld = load i32, ptr %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4, !dbg !16, !tbaa !18
   %cmp3 = icmp sle i32 %ld, %Length, !dbg !22
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !12
   %1 = trunc i64 %indvars.iv.next to i32
@@ -163,16 +163,16 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: ret
 
 ; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z13test_disabledPii(i32* nocapture %A, i32 %Length) #0 !dbg !7 {
+define void @_Z13test_disabledPii(ptr nocapture %A, i32 %Length) #0 !dbg !7 {
 entry:
   %cmp4 = icmp sgt i32 %Length, 0, !dbg !25
   br i1 %cmp4, label %for.body, label %for.end, !dbg !25, !llvm.loop !27
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !30
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !30
   %0 = trunc i64 %indvars.iv to i32, !dbg !30
-  store i32 %0, i32* %arrayidx, align 4, !dbg !30, !tbaa !18
+  store i32 %0, ptr %arrayidx, align 4, !dbg !30, !tbaa !18
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !25
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !25
   %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !25
@@ -187,7 +187,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: ret
 
 ; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 !dbg !8 {
+define void @_Z17test_array_boundsPiS_i(ptr nocapture %A, ptr nocapture readonly %B, i32 %Length) #0 !dbg !8 {
 entry:
   %cmp9 = icmp sgt i32 %Length, 0, !dbg !32
   br i1 %cmp9, label %for.body.preheader, label %for.end, !dbg !32, !llvm.loop !34
@@ -197,13 +197,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !35
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !35, !tbaa !18
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv, !dbg !35
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !35, !tbaa !18
   %idxprom1 = sext i32 %0 to i64, !dbg !35
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1, !dbg !35
-  %1 = load i32, i32* %arrayidx2, align 4, !dbg !35, !tbaa !18
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !35
-  store i32 %1, i32* %arrayidx4, align 4, !dbg !35, !tbaa !18
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %idxprom1, !dbg !35
+  %1 = load i32, ptr %arrayidx2, align 4, !dbg !35, !tbaa !18
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !35
+  store i32 %1, ptr %arrayidx4, align 4, !dbg !35, !tbaa !18
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !32
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !32
   %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !32
@@ -221,21 +221,21 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; CHECK: ret
 
 ; Function Attrs: nounwind uwtable
-define i32 @test_multiple_failures(i32* nocapture readonly %A) #0 !dbg !46 {
+define i32 @test_multiple_failures(ptr nocapture readonly %A) #0 !dbg !46 {
 entry:
   br label %for.body, !dbg !38
 
 for.body:                                         ; preds = %entry, %for.inc
   %i.09 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
   %k.09 = phi i32 [ 0, %entry ], [ %k.1, %for.inc ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.09, !dbg !40
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !40
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.09, !dbg !40
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !40
   %tobool = icmp eq i32 %0, 0, !dbg !40
   br i1 %tobool, label %for.inc, label %if.then, !dbg !40
 
 if.then:                                          ; preds = %for.body
   %call = tail call i32 (...) @foo(), !dbg !41
-  %.pre = load i32, i32* %arrayidx, align 4
+  %.pre = load i32, ptr %arrayidx, align 4
   br label %for.inc, !dbg !42
 
 for.inc:                                          ; preds = %for.body, %if.then

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll
index d589dd50dd27f..2c187caaf93c7 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll
@@ -23,7 +23,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
 
 ; Function Attrs: nounwind uwtable
-define void @do_not_interleave(float** noalias nocapture readonly %in, float* noalias nocapture %out, i32 %size) #0 !dbg !4 {
+define void @do_not_interleave(ptr noalias nocapture readonly %in, ptr noalias nocapture %out, i32 %size) #0 !dbg !4 {
 entry:
   %cmp.4 = icmp eq i32 %size, 0, !dbg !10
   br i1 %cmp.4, label %for.end, label %for.body.preheader, !dbg !11
@@ -33,13 +33,11 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds float*, float** %in, i64 %indvars.iv, !dbg !12
-  %0 = bitcast float** %arrayidx to i32**, !dbg !12
-  %1 = load i32*, i32** %0, align 8, !dbg !12
-  %2 = load i32, i32* %1, align 4, !dbg !13
-  %arrayidx2 = getelementptr inbounds float, float* %out, i64 %indvars.iv, !dbg !14
-  %3 = bitcast float* %arrayidx2 to i32*, !dbg !15
-  store i32 %2, i32* %3, align 4, !dbg !15
+  %arrayidx = getelementptr inbounds ptr, ptr %in, i64 %indvars.iv, !dbg !12
+  %0 = load ptr, ptr %arrayidx, align 8, !dbg !12
+  %1 = load i32, ptr %0, align 4, !dbg !13
+  %arrayidx2 = getelementptr inbounds float, ptr %out, i64 %indvars.iv, !dbg !14
+  store i32 %1, ptr %arrayidx2, align 4, !dbg !15
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !11
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !11
   %exitcond = icmp eq i32 %lftr.wideiv, %size, !dbg !11
@@ -53,20 +51,18 @@ for.end:                                          ; preds = %for.end.loopexit, %
 }
 
 ; Function Attrs: nounwind uwtable
-define void @interleave_not_profitable(float** noalias nocapture readonly %in, float* noalias nocapture %out, i32 %size) #0 !dbg !6 {
+define void @interleave_not_profitable(ptr noalias nocapture readonly %in, ptr noalias nocapture %out, i32 %size) #0 !dbg !6 {
 entry:
   %cmp.4 = icmp eq i32 %size, 0, !dbg !20
   br i1 %cmp.4, label %for.end, label %for.body, !dbg !21
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float*, float** %in, i64 %indvars.iv, !dbg !22
-  %0 = bitcast float** %arrayidx to i32**, !dbg !22
-  %1 = load i32*, i32** %0, align 8, !dbg !22
-  %2 = load i32, i32* %1, align 4, !dbg !23
-  %arrayidx2 = getelementptr inbounds float, float* %out, i64 %indvars.iv, !dbg !24
-  %3 = bitcast float* %arrayidx2 to i32*, !dbg !25
-  store i32 %2, i32* %3, align 4, !dbg !25
+  %arrayidx = getelementptr inbounds ptr, ptr %in, i64 %indvars.iv, !dbg !22
+  %0 = load ptr, ptr %arrayidx, align 8, !dbg !22
+  %1 = load i32, ptr %0, align 4, !dbg !23
+  %arrayidx2 = getelementptr inbounds float, ptr %out, i64 %indvars.iv, !dbg !24
+  store i32 %1, ptr %arrayidx2, align 4, !dbg !25
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !21
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !21
   %exitcond = icmp eq i32 %lftr.wideiv, %size, !dbg !21

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
index c0fb4672cf344..63ed666eb6243 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
@@ -17,17 +17,17 @@ entry:
   %
diff  = alloca i32, align 4
   %cb = alloca [16 x i8], align 16
   %cc = alloca [16 x i8], align 16
-  store i32 0, i32* %
diff , align 4, !dbg !10, !tbaa !11
+  store i32 0, ptr %
diff , align 4, !dbg !10, !tbaa !11
   br label %for.body, !dbg !15
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %add8 = phi i32 [ 0, %entry ], [ %add, %for.body ], !dbg !19
-  %arrayidx = getelementptr inbounds [16 x i8], [16 x i8]* %cb, i64 0, i64 %indvars.iv, !dbg !19
-  %0 = load i8, i8* %arrayidx, align 1, !dbg !19, !tbaa !21
+  %arrayidx = getelementptr inbounds [16 x i8], ptr %cb, i64 0, i64 %indvars.iv, !dbg !19
+  %0 = load i8, ptr %arrayidx, align 1, !dbg !19, !tbaa !21
   %conv = sext i8 %0 to i32, !dbg !19
-  %arrayidx2 = getelementptr inbounds [16 x i8], [16 x i8]* %cc, i64 0, i64 %indvars.iv, !dbg !19
-  %1 = load i8, i8* %arrayidx2, align 1, !dbg !19, !tbaa !21
+  %arrayidx2 = getelementptr inbounds [16 x i8], ptr %cc, i64 0, i64 %indvars.iv, !dbg !19
+  %1 = load i8, ptr %arrayidx2, align 1, !dbg !19, !tbaa !21
   %conv3 = sext i8 %1 to i32, !dbg !19
   %sub = sub i32 %conv, %conv3, !dbg !19
   %add = add nsw i32 %sub, %add8, !dbg !19
@@ -36,12 +36,12 @@ for.body:                                         ; preds = %for.body, %entry
   br i1 %exitcond, label %for.end, label %for.body, !dbg !15
 
 for.end:                                          ; preds = %for.body
-  store i32 %add, i32* %
diff , align 4, !dbg !19, !tbaa !11
-  call void @ibar(i32* %
diff ) #2, !dbg !22
+  store i32 %add, ptr %
diff , align 4, !dbg !19, !tbaa !11
+  call void @ibar(ptr %
diff ) #2, !dbg !22
   ret i32 0, !dbg !23
 }
 
-declare void @ibar(i32*) #1
+declare void @ibar(ptr) #1
 
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/vectorize-only-for-real.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorize-only-for-real.ll
index d811c0f5a1d9c..10ebf9160555e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorize-only-for-real.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorize-only-for-real.ll
@@ -2,7 +2,7 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.11.0"
 
-define i32 @accum(i32* nocapture readonly %x, i32 %N) #0 {
+define i32 @accum(ptr nocapture readonly %x, i32 %N) #0 {
 entry:
 ; CHECK-LABEL: @accum
 ; CHECK-NOT: x i32>
@@ -16,8 +16,8 @@ for.inc.preheader:
 for.inc:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.inc.preheader ]
   %sum.02 = phi i32 [ %add, %for.inc ], [ 0, %for.inc.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, %sum.02
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
index fcabe100da6a5..6ed4cda9fb282 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
@@ -29,7 +29,7 @@ target triple = "i386-unknown-linux-gnu"
 ;   }
 ; }
 
-define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr {
+define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr {
 ; DISABLED_MASKED_STRIDED-LABEL: @masked_strided1(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
@@ -45,8 +45,8 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue:
@@ -55,8 +55,8 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue2:
@@ -65,8 +65,8 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, i8* [[TMP16]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue4:
@@ -75,8 +75,8 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP21]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP21]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue6:
@@ -85,8 +85,8 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP27]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, i8* [[TMP28]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP27]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue8:
@@ -95,8 +95,8 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP33]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, i8* [[TMP34]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP33]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue10:
@@ -105,8 +105,8 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP39]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, i8* [[TMP40]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP39]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue12:
@@ -115,15 +115,14 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP45]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP45]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, ptr [[TMP46]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue14:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = bitcast i8* [[TMP50]] to <8 x i8>*
-; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP49]], <8 x i8>* [[TMP51]], i32 1, <8 x i1> [[TMP0]])
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP49]], ptr [[TMP50]], i32 1, <8 x i1> [[TMP0]])
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
@@ -142,14 +141,12 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr i8, i8* [[P:%.*]], i32 [[TMP1]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP1]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP2]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <8 x i8>*
-; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP5]], i32 1, <8 x i1> [[TMP0]])
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP4]], i32 1, <8 x i1> [[TMP0]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1016
@@ -160,10 +157,10 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; ENABLED_MASKED_STRIDED:       if.then:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[MUL:%.*]] = shl nuw nsw i32 [[IX_09]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[MUL]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[IX_09]]
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP7]], i8* [[ARRAYIDX3]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[MUL]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[IX_09]]
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP7]], ptr [[ARRAYIDX3]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[FOR_INC]]
 ; ENABLED_MASKED_STRIDED:       for.inc:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INC]] = add nuw nsw i32 [[IX_09]], 1
@@ -183,10 +180,10 @@ for.body:
 
 if.then:
   %mul = shl nuw nsw i32 %ix.09, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %ix.09
-  store i8 %0, i8* %arrayidx3, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %q, i32 %ix.09
+  store i8 %0, ptr %arrayidx3, align 1
   br label %for.inc
 
 for.inc:
@@ -205,7 +202,7 @@ for.end:
 ; be vectorized with masked wide-loads with the mask properly shuffled and
 ; And-ed with the gaps mask.
 ;
-define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr optsize {
+define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr optsize {
 ; DISABLED_MASKED_STRIDED-LABEL: @masked_strided1_optsize(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
@@ -221,8 +218,8 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue:
@@ -231,8 +228,8 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue2:
@@ -241,8 +238,8 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, i8* [[TMP16]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue4:
@@ -251,8 +248,8 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP21]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP21]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue6:
@@ -261,8 +258,8 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP27]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, i8* [[TMP28]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP27]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue8:
@@ -271,8 +268,8 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP33]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, i8* [[TMP34]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP33]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue10:
@@ -281,8 +278,8 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP39]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, i8* [[TMP40]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP39]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue12:
@@ -291,15 +288,14 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP45]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP45]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, ptr [[TMP46]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue14:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = bitcast i8* [[TMP50]] to <8 x i8>*
-; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP49]], <8 x i8>* [[TMP51]], i32 1, <8 x i1> [[TMP0]])
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP49]], ptr [[TMP50]], i32 1, <8 x i1> [[TMP0]])
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
@@ -318,15 +314,13 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr i8, i8* [[P:%.*]], i32 [[TMP1]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP1]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[TMP4]], <16 x i8> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP2]], i32 1, <16 x i1> [[TMP4]], <16 x i8> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <8 x i8>*
-; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP6]], i32 1, <8 x i1> [[TMP0]])
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP5]], i32 1, <8 x i1> [[TMP0]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
@@ -345,10 +339,10 @@ for.body:
 
 if.then:
   %mul = shl nuw nsw i32 %ix.09, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %ix.09
-  store i8 %0, i8* %arrayidx3, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %q, i32 %ix.09
+  store i8 %0, ptr %arrayidx3, align 1
   br label %for.inc
 
 for.inc:
@@ -387,7 +381,7 @@ for.end:
 ;   }
 ; }
 ;
-define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard, i32 %n) local_unnamed_addr optsize {
+define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard, i32 %n) local_unnamed_addr optsize {
 ; DISABLED_MASKED_STRIDED-LABEL: @masked_strided1_optsize_unknown_tc(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -413,8 +407,8 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP5]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, i8* [[TMP6]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP5]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = insertelement <8 x i8> poison, i8 [[TMP7]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue:
@@ -423,8 +417,8 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP11]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = load i8, i8* [[TMP12]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP11]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[TMP13]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue4:
@@ -433,8 +427,8 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP17]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, i8* [[TMP18]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP17]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = insertelement <8 x i8> [[TMP15]], i8 [[TMP19]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue6:
@@ -443,8 +437,8 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP22]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP23]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = load i8, i8* [[TMP24]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP23]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = insertelement <8 x i8> [[TMP21]], i8 [[TMP25]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue8:
@@ -453,8 +447,8 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP28]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP29]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = load i8, i8* [[TMP30]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP29]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = insertelement <8 x i8> [[TMP27]], i8 [[TMP31]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue10:
@@ -463,8 +457,8 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP35]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = load i8, i8* [[TMP36]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP35]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = load i8, ptr [[TMP36]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = insertelement <8 x i8> [[TMP33]], i8 [[TMP37]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue12:
@@ -473,8 +467,8 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP40]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP41]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = load i8, i8* [[TMP42]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP41]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = load i8, ptr [[TMP42]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP44:%.*]] = insertelement <8 x i8> [[TMP39]], i8 [[TMP43]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue14:
@@ -483,15 +477,14 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP46]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if15:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP47]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = load i8, i8* [[TMP48]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP47]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = load i8, ptr [[TMP48]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = insertelement <8 x i8> [[TMP45]], i8 [[TMP49]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue16:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = phi <8 x i8> [ [[TMP45]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP50]], [[PRED_LOAD_IF15]] ]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = bitcast i8* [[TMP52]] to <8 x i8>*
-; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP51]], <8 x i8>* [[TMP53]], i32 1, <8 x i1> [[TMP3]])
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP51]], ptr [[TMP52]], i32 1, <8 x i1> [[TMP3]])
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
@@ -519,16 +512,14 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl i32 [[INDEX]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr i8, i8* [[P:%.*]], i32 [[TMP2]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP2]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>*
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[TMP6]], <16 x i8> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP3]], i32 1, <16 x i1> [[TMP6]], <16 x i8> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <8 x i8>*
-; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP8]], i32 1, <8 x i1> [[TMP4]])
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP7]], i32 1, <8 x i1> [[TMP4]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
@@ -551,10 +542,10 @@ for.body:
 
 if.then:
   %mul = shl nuw nsw i32 %ix.010, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %ix.010
-  store i8 %0, i8* %arrayidx3, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %q, i32 %ix.010
+  store i8 %0, ptr %arrayidx3, align 1
   br label %for.inc
 
 for.inc:
@@ -594,7 +585,7 @@ for.end:
 ;   }
 ; }
 ;
-define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard, i32 %n) local_unnamed_addr optsize {
+define dso_local void @masked_strided3_optsize_unknown_tc(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard, i32 %n) local_unnamed_addr optsize {
 ; DISABLED_MASKED_STRIDED-LABEL: @masked_strided3_optsize_unknown_tc(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -620,8 +611,8 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP5]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, i8* [[TMP6]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP5]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = insertelement <8 x i8> poison, i8 [[TMP7]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue:
@@ -630,8 +621,8 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP11]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = load i8, i8* [[TMP12]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP11]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[TMP13]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue4:
@@ -640,8 +631,8 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP17]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, i8* [[TMP18]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP17]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = insertelement <8 x i8> [[TMP15]], i8 [[TMP19]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue6:
@@ -650,8 +641,8 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP22]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP23]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = load i8, i8* [[TMP24]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP23]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = insertelement <8 x i8> [[TMP21]], i8 [[TMP25]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue8:
@@ -660,8 +651,8 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP28]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP29]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = load i8, i8* [[TMP30]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP29]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = insertelement <8 x i8> [[TMP27]], i8 [[TMP31]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue10:
@@ -670,8 +661,8 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP35]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = load i8, i8* [[TMP36]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP35]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = load i8, ptr [[TMP36]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = insertelement <8 x i8> [[TMP33]], i8 [[TMP37]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue12:
@@ -680,8 +671,8 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP40]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP41]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = load i8, i8* [[TMP42]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP41]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = load i8, ptr [[TMP42]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP44:%.*]] = insertelement <8 x i8> [[TMP39]], i8 [[TMP43]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue14:
@@ -690,15 +681,14 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP46]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if15:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP47]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = load i8, i8* [[TMP48]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP47]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = load i8, ptr [[TMP48]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = insertelement <8 x i8> [[TMP45]], i8 [[TMP49]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue16:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = phi <8 x i8> [ [[TMP45]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP50]], [[PRED_LOAD_IF15]] ]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = bitcast i8* [[TMP52]] to <8 x i8>*
-; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP51]], <8 x i8>* [[TMP53]], i32 1, <8 x i1> [[TMP3]])
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP51]], ptr [[TMP52]], i32 1, <8 x i1> [[TMP3]])
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
@@ -726,16 +716,14 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = mul i32 [[INDEX]], 3
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr i8, i8* [[P:%.*]], i32 [[TMP2]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP2]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <24 x i8>*
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = and <24 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false>
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <24 x i8> @llvm.masked.load.v24i8.p0v24i8(<24 x i8>* [[TMP5]], i32 1, <24 x i1> [[TMP6]], <24 x i8> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <24 x i8> @llvm.masked.load.v24i8.p0(ptr [[TMP3]], i32 1, <24 x i1> [[TMP6]], <24 x i8> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <24 x i8> [[WIDE_MASKED_VEC]], <24 x i8> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <8 x i8>*
-; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP8]], i32 1, <8 x i1> [[TMP4]])
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP7]], i32 1, <8 x i1> [[TMP4]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
@@ -758,10 +746,10 @@ for.body:
 
 if.then:
   %mul = mul nsw i32 %ix.010, 3
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %ix.010
-  store i8 %0, i8* %arrayidx3, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %q, i32 %ix.010
+  store i8 %0, ptr %arrayidx3, align 1
   br label %for.inc
 
 for.inc:
@@ -797,7 +785,7 @@ for.end:
 ;   }
 ; }
 ;
-define dso_local void @unconditional_strided1_optsize(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr optsize {
+define dso_local void @unconditional_strided1_optsize(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr optsize {
 ; DISABLED_MASKED_STRIDED-LABEL: @unconditional_strided1_optsize(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -806,29 +794,29 @@ define dso_local void @unconditional_strided1_optsize(i8* noalias nocapture read
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[TMP0]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP1]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP0]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP3]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP0]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP5]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP5]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = extractelement <8 x i32> [[TMP0]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP7]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP7]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP0]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP0]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP11]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP11]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = extractelement <8 x i32> [[TMP0]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP13]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP13]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP0]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, i8* [[TMP2]], align 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = load i8, i8* [[TMP4]], align 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, i8* [[TMP6]], align 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = load i8, i8* [[TMP8]], align 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = load i8, i8* [[TMP10]], align 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = load i8, i8* [[TMP12]], align 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, i8* [[TMP14]], align 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = load i8, i8* [[TMP16]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP2]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = load i8, ptr [[TMP4]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, ptr [[TMP6]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = load i8, ptr [[TMP8]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = load i8, ptr [[TMP10]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = load i8, ptr [[TMP12]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, ptr [[TMP14]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = load i8, ptr [[TMP16]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = insertelement <8 x i8> poison, i8 [[TMP17]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP18]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = insertelement <8 x i8> [[TMP26]], i8 [[TMP19]], i64 2
@@ -837,9 +825,8 @@ define dso_local void @unconditional_strided1_optsize(i8* noalias nocapture read
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP29]], i8 [[TMP22]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = insertelement <8 x i8> [[TMP30]], i8 [[TMP23]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP24]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = bitcast i8* [[TMP33]] to <8 x i8>*
-; DISABLED_MASKED_STRIDED-NEXT:    store <8 x i8> [[TMP32]], <8 x i8>* [[TMP34]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    store <8 x i8> [[TMP32]], ptr [[TMP33]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
@@ -853,13 +840,11 @@ define dso_local void @unconditional_strided1_optsize(i8* noalias nocapture read
 ; ENABLED_MASKED_STRIDED:       vector.body:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = shl nuw nsw i32 [[INDEX]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP0]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <16 x i8>*
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP2]], i32 1, <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i8> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP0]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP1]], i32 1, <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i8> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <8 x i8>*
-; ENABLED_MASKED_STRIDED-NEXT:    store <8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP4]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    store <8 x i8> [[STRIDED_VEC]], ptr [[TMP3]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -872,10 +857,10 @@ entry:
 for.body:
   %ix.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %mul = shl nuw nsw i32 %ix.06, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx1 = getelementptr inbounds i8, i8* %q, i32 %ix.06
-  store i8 %0, i8* %arrayidx1, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %q, i32 %ix.06
+  store i8 %0, ptr %arrayidx1, align 1
   %inc = add nuw nsw i32 %ix.06, 1
   %exitcond = icmp eq i32 %inc, 1024
   br i1 %exitcond, label %for.end, label %for.body
@@ -905,7 +890,7 @@ for.end:
 ;         q[ix] = t;
 ;   }
 ;
-define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %n) local_unnamed_addr optsize {
+define dso_local void @unconditional_strided1_optsize_unknown_tc(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %n) local_unnamed_addr optsize {
 ; DISABLED_MASKED_STRIDED-LABEL: @unconditional_strided1_optsize_unknown_tc(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -926,8 +911,8 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue:
@@ -936,8 +921,8 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue2:
@@ -946,8 +931,8 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, i8* [[TMP16]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue4:
@@ -956,8 +941,8 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP21]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP21]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue6:
@@ -966,8 +951,8 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP27]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, i8* [[TMP28]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP27]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue8:
@@ -976,8 +961,8 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP33]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, i8* [[TMP34]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP33]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue10:
@@ -986,8 +971,8 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP39]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, i8* [[TMP40]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP39]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue12:
@@ -996,15 +981,14 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP45]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP45]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, ptr [[TMP46]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue14:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = bitcast i8* [[TMP50]] to <8 x i8>*
-; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP49]], <8 x i8>* [[TMP51]], i32 1, <8 x i1> [[TMP0]])
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP49]], ptr [[TMP50]], i32 1, <8 x i1> [[TMP0]])
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
@@ -1030,15 +1014,13 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IV:%.*]] = or <8 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP1]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[TMP4]], <16 x i8> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP2]], i32 1, <16 x i1> [[TMP4]], <16 x i8> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <8 x i8>*
-; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP6]], i32 1, <8 x i1> [[TMP0]])
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP5]], i32 1, <8 x i1> [[TMP0]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP7]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -1055,10 +1037,10 @@ for.body.preheader:
 for.body:
   %ix.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %mul = shl nuw nsw i32 %ix.07, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx1 = getelementptr inbounds i8, i8* %q, i32 %ix.07
-  store i8 %0, i8* %arrayidx1, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %q, i32 %ix.07
+  store i8 %0, ptr %arrayidx1, align 1
   %inc = add nuw nsw i32 %ix.07, 1
   %exitcond = icmp eq i32 %inc, %n
   br i1 %exitcond, label %for.end.loopexit, label %for.body
@@ -1095,7 +1077,7 @@ for.end:
 ; }
 ;}
 ;
-define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr  {
+define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr  {
 ; DISABLED_MASKED_STRIDED-LABEL: @masked_strided2(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
@@ -1111,8 +1093,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue:
@@ -1121,8 +1103,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue2:
@@ -1131,8 +1113,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, i8* [[TMP16]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue4:
@@ -1141,8 +1123,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP21]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP21]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue6:
@@ -1151,8 +1133,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP27]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, i8* [[TMP28]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP27]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue8:
@@ -1161,8 +1143,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP33]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, i8* [[TMP34]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP33]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue10:
@@ -1171,8 +1153,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP39]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, i8* [[TMP40]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP39]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue12:
@@ -1181,8 +1163,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP45]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP45]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, ptr [[TMP46]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue14:
@@ -1192,8 +1174,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if15:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP52]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = load i8, i8* [[TMP53]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP52]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP55:%.*]] = insertelement <8 x i8> poison, i8 [[TMP54]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue16:
@@ -1202,8 +1184,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP57]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if17:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP58:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP58]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = load i8, i8* [[TMP59]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP58]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = load i8, ptr [[TMP59]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP61:%.*]] = insertelement <8 x i8> [[TMP56]], i8 [[TMP60]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue18:
@@ -1212,8 +1194,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP63]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if19:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP64:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP64]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = load i8, i8* [[TMP65]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP64]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = load i8, ptr [[TMP65]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP67:%.*]] = insertelement <8 x i8> [[TMP62]], i8 [[TMP66]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue20:
@@ -1222,8 +1204,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP69]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if21:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP70:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP70]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = load i8, i8* [[TMP71]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP70]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = load i8, ptr [[TMP71]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP73:%.*]] = insertelement <8 x i8> [[TMP68]], i8 [[TMP72]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue22:
@@ -1232,8 +1214,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP75]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if23:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP76:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP76]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = load i8, i8* [[TMP77]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP76]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = load i8, ptr [[TMP77]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP79:%.*]] = insertelement <8 x i8> [[TMP74]], i8 [[TMP78]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue24:
@@ -1242,8 +1224,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP81]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if25:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP82:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP82]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = load i8, i8* [[TMP83]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP82]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = load i8, ptr [[TMP83]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP85:%.*]] = insertelement <8 x i8> [[TMP80]], i8 [[TMP84]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue26:
@@ -1252,8 +1234,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if27:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP88:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP88]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = load i8, i8* [[TMP89]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP88]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = load i8, ptr [[TMP89]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP91:%.*]] = insertelement <8 x i8> [[TMP86]], i8 [[TMP90]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue28:
@@ -1262,8 +1244,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP93]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if29:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP94:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP94]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = load i8, i8* [[TMP95]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP94]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = load i8, ptr [[TMP95]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP97:%.*]] = insertelement <8 x i8> [[TMP92]], i8 [[TMP96]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue30:
@@ -1273,72 +1255,72 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP100]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP101:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP101]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[TMP101]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP103:%.*]] = extractelement <8 x i8> [[TMP99]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP103]], i8* [[TMP102]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP103]], ptr [[TMP102]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP104:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP104]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if31:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP105:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP105]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP105]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = extractelement <8 x i8> [[TMP99]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP107]], i8* [[TMP106]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP107]], ptr [[TMP106]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE32]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue32:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP108:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP108]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if33:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP109:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP109]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP109]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = extractelement <8 x i8> [[TMP99]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP111]], i8* [[TMP110]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP111]], ptr [[TMP110]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE34]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue34:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP112:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP112]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if35:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP113:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP113]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP113]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = extractelement <8 x i8> [[TMP99]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP115]], i8* [[TMP114]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP115]], ptr [[TMP114]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE36]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue36:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP116:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP116]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if37:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP117:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP117]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP117]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = extractelement <8 x i8> [[TMP99]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP119]], i8* [[TMP118]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP119]], ptr [[TMP118]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE38]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue38:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP120:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP120]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if39:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP121:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP121]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP121]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = extractelement <8 x i8> [[TMP99]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP123]], i8* [[TMP122]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP123]], ptr [[TMP122]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE40]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue40:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP124:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP124]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if41:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP125:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP125]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP125]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = extractelement <8 x i8> [[TMP99]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP127]], i8* [[TMP126]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP127]], ptr [[TMP126]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE42]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue42:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP128:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP128]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if43:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP129:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP129]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP129]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = extractelement <8 x i8> [[TMP99]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP131]], i8* [[TMP130]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP131]], ptr [[TMP130]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE44]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue44:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]]
@@ -1346,72 +1328,72 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if45:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP134]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP134]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP136:%.*]] = extractelement <8 x i8> [[TMP132]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], i8* [[TMP135]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], ptr [[TMP135]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE46]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue46:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if47:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP138]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP138]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = extractelement <8 x i8> [[TMP132]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP140]], i8* [[TMP139]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP140]], ptr [[TMP139]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE48]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue48:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if49:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP142]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP142]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP144]], i8* [[TMP143]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP144]], ptr [[TMP143]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE50]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue50:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if51:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP146]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP146]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP148]], i8* [[TMP147]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP148]], ptr [[TMP147]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE52]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue52:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if53:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP150]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP150]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP152]], i8* [[TMP151]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP152]], ptr [[TMP151]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE54]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue54:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if55:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP154]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP154]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP156]], i8* [[TMP155]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP156]], ptr [[TMP155]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE56]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue56:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if57:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP158]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP158]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP160]], i8* [[TMP159]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP160]], ptr [[TMP159]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE58]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue58:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if59:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP162]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP162]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP164]], i8* [[TMP163]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP164]], ptr [[TMP163]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE60]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue60:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
@@ -1432,20 +1414,18 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr i8, i8* [[P:%.*]], i32 [[TMP1]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP1]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP2]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = or i32 [[TMP1]], 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8> [[STRIDED_VEC1]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = sub <8 x i8> zeroinitializer, [[TMP5]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 -1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[TMP7]], i32 [[TMP4]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <16 x i8>*
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 -1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i32 [[TMP4]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP9]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
+; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP8]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
@@ -1464,18 +1444,18 @@ for.body:
 
 if.then:
   %mul = shl nuw nsw i32 %ix.024, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
   %add = or i32 %mul, 1
-  %arrayidx4 = getelementptr inbounds i8, i8* %p, i32 %add
-  %1 = load i8, i8* %arrayidx4, align 1
+  %arrayidx4 = getelementptr inbounds i8, ptr %p, i32 %add
+  %1 = load i8, ptr %arrayidx4, align 1
   %cmp.i = icmp slt i8 %0, %1
   %spec.select.i = select i1 %cmp.i, i8 %1, i8 %0
-  %arrayidx6 = getelementptr inbounds i8, i8* %q, i32 %mul
-  store i8 %spec.select.i, i8* %arrayidx6, align 1
+  %arrayidx6 = getelementptr inbounds i8, ptr %q, i32 %mul
+  store i8 %spec.select.i, ptr %arrayidx6, align 1
   %sub = sub i8 0, %spec.select.i
-  %arrayidx11 = getelementptr inbounds i8, i8* %q, i32 %add
-  store i8 %sub, i8* %arrayidx11, align 1
+  %arrayidx11 = getelementptr inbounds i8, ptr %q, i32 %add
+  store i8 %sub, ptr %arrayidx11, align 1
   br label %for.inc
 
 for.inc:
@@ -1504,16 +1484,16 @@ for.end:
 ; }
 ;}
 ;
-define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr  {
+define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr  {
 ; DISABLED_MASKED_STRIDED-LABEL: @masked_strided2_reverse(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
-; DISABLED_MASKED_STRIDED-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 2048
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr i8, i8* [[Q]], i32 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ugt i8* [[TMP0]], [[SCEVGEP]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, i8* [[Q]], i32 2049
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr i8, i8* [[Q]], i32 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = icmp ugt i8* [[TMP2]], [[SCEVGEP2]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 2048
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[Q]], i32 2
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ugt ptr [[TMP0]], [[SCEVGEP]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[Q]], i32 2049
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[Q]], i32 3
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = icmp ugt ptr [[TMP2]], [[SCEVGEP2]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = or i1 [[TMP1]], [[TMP3]]
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
 ; DISABLED_MASKED_STRIDED:       vector.ph:
@@ -1529,8 +1509,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP7]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP8]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = load i8, i8* [[TMP9]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP8]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = insertelement <8 x i8> poison, i8 [[TMP10]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue:
@@ -1539,8 +1519,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP13]], label [[PRED_LOAD_IF6:%.*]], label [[PRED_LOAD_CONTINUE7:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if6:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP14]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = load i8, i8* [[TMP15]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP14]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = load i8, ptr [[TMP15]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = insertelement <8 x i8> [[TMP12]], i8 [[TMP16]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE7]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue7:
@@ -1549,8 +1529,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP19]], label [[PRED_LOAD_IF8:%.*]], label [[PRED_LOAD_CONTINUE9:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if8:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP20]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = load i8, i8* [[TMP21]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP20]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = load i8, ptr [[TMP21]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = insertelement <8 x i8> [[TMP18]], i8 [[TMP22]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE9]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue9:
@@ -1559,8 +1539,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP25]], label [[PRED_LOAD_IF10:%.*]], label [[PRED_LOAD_CONTINUE11:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if10:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP26]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = load i8, i8* [[TMP27]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP26]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = load i8, ptr [[TMP27]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = insertelement <8 x i8> [[TMP24]], i8 [[TMP28]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE11]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue11:
@@ -1569,8 +1549,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP31]], label [[PRED_LOAD_IF12:%.*]], label [[PRED_LOAD_CONTINUE13:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if12:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP32]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = load i8, i8* [[TMP33]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP32]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = load i8, ptr [[TMP33]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = insertelement <8 x i8> [[TMP30]], i8 [[TMP34]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE13]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue13:
@@ -1579,8 +1559,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP37]], label [[PRED_LOAD_IF14:%.*]], label [[PRED_LOAD_CONTINUE15:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if14:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP38]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = load i8, i8* [[TMP39]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP38]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = load i8, ptr [[TMP39]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = insertelement <8 x i8> [[TMP36]], i8 [[TMP40]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE15]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue15:
@@ -1589,8 +1569,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP43]], label [[PRED_LOAD_IF16:%.*]], label [[PRED_LOAD_CONTINUE17:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if16:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP44:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP44]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = load i8, i8* [[TMP45]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP44]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = load i8, ptr [[TMP45]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = insertelement <8 x i8> [[TMP42]], i8 [[TMP46]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE17]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue17:
@@ -1599,8 +1579,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP49]], label [[PRED_LOAD_IF18:%.*]], label [[PRED_LOAD_CONTINUE19:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if18:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP50]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = load i8, i8* [[TMP51]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP50]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = load i8, ptr [[TMP51]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = insertelement <8 x i8> [[TMP48]], i8 [[TMP52]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE19]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue19:
@@ -1610,8 +1590,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP56]], label [[PRED_LOAD_IF20:%.*]], label [[PRED_LOAD_CONTINUE21:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if20:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP57:%.*]] = extractelement <8 x i32> [[TMP55]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP58:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP57]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = load i8, i8* [[TMP58]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP57]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = load i8, ptr [[TMP58]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = insertelement <8 x i8> poison, i8 [[TMP59]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE21]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue21:
@@ -1620,8 +1600,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP62]], label [[PRED_LOAD_IF22:%.*]], label [[PRED_LOAD_CONTINUE23:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if22:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP63:%.*]] = extractelement <8 x i32> [[TMP55]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP64:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP63]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = load i8, i8* [[TMP64]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP64:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP63]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = load i8, ptr [[TMP64]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = insertelement <8 x i8> [[TMP61]], i8 [[TMP65]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE23]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue23:
@@ -1630,8 +1610,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP68]], label [[PRED_LOAD_IF24:%.*]], label [[PRED_LOAD_CONTINUE25:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if24:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP69:%.*]] = extractelement <8 x i32> [[TMP55]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP70:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP69]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = load i8, i8* [[TMP70]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP70:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP69]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = load i8, ptr [[TMP70]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = insertelement <8 x i8> [[TMP67]], i8 [[TMP71]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE25]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue25:
@@ -1640,8 +1620,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP74]], label [[PRED_LOAD_IF26:%.*]], label [[PRED_LOAD_CONTINUE27:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if26:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP75:%.*]] = extractelement <8 x i32> [[TMP55]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP76:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP75]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = load i8, i8* [[TMP76]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP76:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP75]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = load i8, ptr [[TMP76]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = insertelement <8 x i8> [[TMP73]], i8 [[TMP77]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE27]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue27:
@@ -1650,8 +1630,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP80]], label [[PRED_LOAD_IF28:%.*]], label [[PRED_LOAD_CONTINUE29:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if28:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP81:%.*]] = extractelement <8 x i32> [[TMP55]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP82:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP81]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = load i8, i8* [[TMP82]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP82:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP81]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = load i8, ptr [[TMP82]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = insertelement <8 x i8> [[TMP79]], i8 [[TMP83]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE29]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue29:
@@ -1660,8 +1640,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP86]], label [[PRED_LOAD_IF30:%.*]], label [[PRED_LOAD_CONTINUE31:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if30:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP87:%.*]] = extractelement <8 x i32> [[TMP55]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP88:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP87]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = load i8, i8* [[TMP88]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP88:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP87]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = load i8, ptr [[TMP88]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = insertelement <8 x i8> [[TMP85]], i8 [[TMP89]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE31]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue31:
@@ -1670,8 +1650,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP92]], label [[PRED_LOAD_IF32:%.*]], label [[PRED_LOAD_CONTINUE33:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if32:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP93:%.*]] = extractelement <8 x i32> [[TMP55]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP94:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP93]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = load i8, i8* [[TMP94]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP94:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP93]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = load i8, ptr [[TMP94]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = insertelement <8 x i8> [[TMP91]], i8 [[TMP95]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE33]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue33:
@@ -1680,8 +1660,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP98]], label [[PRED_LOAD_IF34:%.*]], label [[PRED_LOAD_CONTINUE35:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if34:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP99:%.*]] = extractelement <8 x i32> [[TMP55]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP100:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP99]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP101:%.*]] = load i8, i8* [[TMP100]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP100:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP99]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP101:%.*]] = load i8, ptr [[TMP100]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = insertelement <8 x i8> [[TMP97]], i8 [[TMP101]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE35]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue35:
@@ -1691,72 +1671,72 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP105]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP106]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP106]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP108:%.*]] = extractelement <8 x i8> [[TMP104]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP108]], i8* [[TMP107]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP108]], ptr [[TMP107]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP109:%.*]] = extractelement <8 x i1> [[TMP5]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP109]], label [[PRED_STORE_IF36:%.*]], label [[PRED_STORE_CONTINUE37:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if36:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP110]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP110]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP112:%.*]] = extractelement <8 x i8> [[TMP104]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP112]], i8* [[TMP111]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP112]], ptr [[TMP111]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE37]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue37:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP113:%.*]] = extractelement <8 x i1> [[TMP5]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP113]], label [[PRED_STORE_IF38:%.*]], label [[PRED_STORE_CONTINUE39:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if38:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP114]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP114]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP116:%.*]] = extractelement <8 x i8> [[TMP104]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP116]], i8* [[TMP115]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP116]], ptr [[TMP115]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE39]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue39:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP117:%.*]] = extractelement <8 x i1> [[TMP5]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP117]], label [[PRED_STORE_IF40:%.*]], label [[PRED_STORE_CONTINUE41:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if40:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP118]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP118]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP120:%.*]] = extractelement <8 x i8> [[TMP104]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP120]], i8* [[TMP119]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP120]], ptr [[TMP119]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE41]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue41:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP121:%.*]] = extractelement <8 x i1> [[TMP5]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP121]], label [[PRED_STORE_IF42:%.*]], label [[PRED_STORE_CONTINUE43:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if42:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP122]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP122]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP124:%.*]] = extractelement <8 x i8> [[TMP104]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP124]], i8* [[TMP123]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP124]], ptr [[TMP123]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE43]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue43:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP125:%.*]] = extractelement <8 x i1> [[TMP5]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP125]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE45:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if44:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP126]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP126]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP128:%.*]] = extractelement <8 x i8> [[TMP104]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP128]], i8* [[TMP127]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP128]], ptr [[TMP127]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE45]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue45:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP129:%.*]] = extractelement <8 x i1> [[TMP5]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP129]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE47:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if46:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP130]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP130]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = extractelement <8 x i8> [[TMP104]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP132]], i8* [[TMP131]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP132]], ptr [[TMP131]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE47]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue47:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP133:%.*]] = extractelement <8 x i1> [[TMP5]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP133]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE49:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if48:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP134]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP134]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP136:%.*]] = extractelement <8 x i8> [[TMP104]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], i8* [[TMP135]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], ptr [[TMP135]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE49]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue49:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = sub <8 x i8> zeroinitializer, [[TMP104]]
@@ -1764,72 +1744,72 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP138]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE51:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if50:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = extractelement <8 x i32> [[TMP55]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP139]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP139]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = extractelement <8 x i8> [[TMP137]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP141]], i8* [[TMP140]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP141]], ptr [[TMP140]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE51]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue51:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i1> [[TMP5]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP142]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE53:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if52:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = extractelement <8 x i32> [[TMP55]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP143]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP143]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = extractelement <8 x i8> [[TMP137]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP145]], i8* [[TMP144]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP145]], ptr [[TMP144]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE53]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue53:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i1> [[TMP5]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP146]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE55:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if54:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = extractelement <8 x i32> [[TMP55]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP147]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP147]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = extractelement <8 x i8> [[TMP137]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP149]], i8* [[TMP148]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP149]], ptr [[TMP148]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE55]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue55:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i1> [[TMP5]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP150]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE57:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if56:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = extractelement <8 x i32> [[TMP55]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP151]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP151]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = extractelement <8 x i8> [[TMP137]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP153]], i8* [[TMP152]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP153]], ptr [[TMP152]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE57]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue57:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i1> [[TMP5]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP154]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE59:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if58:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = extractelement <8 x i32> [[TMP55]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP155]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP155]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = extractelement <8 x i8> [[TMP137]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP157]], i8* [[TMP156]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP157]], ptr [[TMP156]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE59]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue59:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i1> [[TMP5]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP158]], label [[PRED_STORE_IF60:%.*]], label [[PRED_STORE_CONTINUE61:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if60:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = extractelement <8 x i32> [[TMP55]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP159]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP159]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = extractelement <8 x i8> [[TMP137]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP161]], i8* [[TMP160]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP161]], ptr [[TMP160]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE61]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue61:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i1> [[TMP5]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP162]], label [[PRED_STORE_IF62:%.*]], label [[PRED_STORE_CONTINUE63:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if62:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = extractelement <8 x i32> [[TMP55]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP163]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP163]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP165:%.*]] = extractelement <8 x i8> [[TMP137]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP165]], i8* [[TMP164]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP165]], ptr [[TMP164]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE63]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue63:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP166:%.*]] = extractelement <8 x i1> [[TMP5]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP166]], label [[PRED_STORE_IF64:%.*]], label [[PRED_STORE_CONTINUE65]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if64:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP167:%.*]] = extractelement <8 x i32> [[TMP55]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP168:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP167]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP168:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP167]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP169:%.*]] = extractelement <8 x i8> [[TMP137]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP169]], i8* [[TMP168]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP169]], ptr [[TMP168]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE65]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue65:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
@@ -1842,17 +1822,17 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; DISABLED_MASKED_STRIDED:       if.then:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[MUL:%.*]] = shl nuw nsw i32 [[IX_024]], 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[MUL]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP171:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[MUL]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP171:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[ADD:%.*]] = or i32 [[MUL]], 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[ADD]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP172:%.*]] = load i8, i8* [[ARRAYIDX4]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[ADD]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP172:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP173:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP171]], i8 [[TMP172]])
-; DISABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[MUL]]
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP173]], i8* [[ARRAYIDX6]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[MUL]]
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP173]], ptr [[ARRAYIDX6]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[SUB:%.*]] = sub i8 0, [[TMP173]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[ADD]]
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[SUB]], i8* [[ARRAYIDX11]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[ADD]]
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[SUB]], ptr [[ARRAYIDX11]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[FOR_INC]]
 ; DISABLED_MASKED_STRIDED:       for.inc:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INC]] = add nsw i32 [[IX_024]], -1
@@ -1864,12 +1844,12 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-LABEL: @masked_strided2_reverse(
 ; ENABLED_MASKED_STRIDED-NEXT:  entry:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
-; ENABLED_MASKED_STRIDED-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 2048
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr i8, i8* [[Q]], i32 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ugt i8* [[TMP0]], [[SCEVGEP]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, i8* [[Q]], i32 2049
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr i8, i8* [[Q]], i32 3
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = icmp ugt i8* [[TMP2]], [[SCEVGEP2]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 2048
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[Q]], i32 2
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ugt ptr [[TMP0]], [[SCEVGEP]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[Q]], i32 2049
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[Q]], i32 3
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = icmp ugt ptr [[TMP2]], [[SCEVGEP2]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = or i1 [[TMP1]], [[TMP3]]
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
 ; ENABLED_MASKED_STRIDED:       vector.ph:
@@ -1885,8 +1865,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP7]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP8]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = load i8, i8* [[TMP9]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP8]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = insertelement <8 x i8> poison, i8 [[TMP10]], i64 0
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue:
@@ -1895,8 +1875,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP13]], label [[PRED_LOAD_IF6:%.*]], label [[PRED_LOAD_CONTINUE7:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if6:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP14]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = load i8, i8* [[TMP15]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP14]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = load i8, ptr [[TMP15]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = insertelement <8 x i8> [[TMP12]], i8 [[TMP16]], i64 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE7]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue7:
@@ -1905,8 +1885,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP19]], label [[PRED_LOAD_IF8:%.*]], label [[PRED_LOAD_CONTINUE9:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if8:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP20]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = load i8, i8* [[TMP21]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP20]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = load i8, ptr [[TMP21]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = insertelement <8 x i8> [[TMP18]], i8 [[TMP22]], i64 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE9]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue9:
@@ -1915,8 +1895,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP25]], label [[PRED_LOAD_IF10:%.*]], label [[PRED_LOAD_CONTINUE11:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if10:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP26]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = load i8, i8* [[TMP27]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP26]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = load i8, ptr [[TMP27]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = insertelement <8 x i8> [[TMP24]], i8 [[TMP28]], i64 3
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE11]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue11:
@@ -1925,8 +1905,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP31]], label [[PRED_LOAD_IF12:%.*]], label [[PRED_LOAD_CONTINUE13:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if12:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP32]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = load i8, i8* [[TMP33]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP32]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = load i8, ptr [[TMP33]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = insertelement <8 x i8> [[TMP30]], i8 [[TMP34]], i64 4
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE13]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue13:
@@ -1935,8 +1915,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP37]], label [[PRED_LOAD_IF14:%.*]], label [[PRED_LOAD_CONTINUE15:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if14:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP38]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = load i8, i8* [[TMP39]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP38]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = load i8, ptr [[TMP39]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = insertelement <8 x i8> [[TMP36]], i8 [[TMP40]], i64 5
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE15]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue15:
@@ -1945,8 +1925,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP43]], label [[PRED_LOAD_IF16:%.*]], label [[PRED_LOAD_CONTINUE17:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if16:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP44:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP44]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = load i8, i8* [[TMP45]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP44]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = load i8, ptr [[TMP45]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = insertelement <8 x i8> [[TMP42]], i8 [[TMP46]], i64 6
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE17]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue17:
@@ -1955,8 +1935,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP49]], label [[PRED_LOAD_IF18:%.*]], label [[PRED_LOAD_CONTINUE19:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if18:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP50]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = load i8, i8* [[TMP51]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP50]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = load i8, ptr [[TMP51]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = insertelement <8 x i8> [[TMP48]], i8 [[TMP52]], i64 7
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE19]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue19:
@@ -1966,8 +1946,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP56]], label [[PRED_LOAD_IF20:%.*]], label [[PRED_LOAD_CONTINUE21:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if20:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP57:%.*]] = extractelement <8 x i32> [[TMP55]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP58:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP57]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = load i8, i8* [[TMP58]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP57]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = load i8, ptr [[TMP58]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = insertelement <8 x i8> poison, i8 [[TMP59]], i64 0
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE21]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue21:
@@ -1976,8 +1956,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP62]], label [[PRED_LOAD_IF22:%.*]], label [[PRED_LOAD_CONTINUE23:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if22:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP63:%.*]] = extractelement <8 x i32> [[TMP55]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP64:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP63]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = load i8, i8* [[TMP64]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP64:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP63]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = load i8, ptr [[TMP64]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = insertelement <8 x i8> [[TMP61]], i8 [[TMP65]], i64 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE23]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue23:
@@ -1986,8 +1966,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP68]], label [[PRED_LOAD_IF24:%.*]], label [[PRED_LOAD_CONTINUE25:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if24:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP69:%.*]] = extractelement <8 x i32> [[TMP55]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP70:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP69]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = load i8, i8* [[TMP70]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP70:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP69]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = load i8, ptr [[TMP70]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = insertelement <8 x i8> [[TMP67]], i8 [[TMP71]], i64 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE25]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue25:
@@ -1996,8 +1976,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP74]], label [[PRED_LOAD_IF26:%.*]], label [[PRED_LOAD_CONTINUE27:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if26:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP75:%.*]] = extractelement <8 x i32> [[TMP55]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP76:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP75]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = load i8, i8* [[TMP76]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP76:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP75]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = load i8, ptr [[TMP76]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = insertelement <8 x i8> [[TMP73]], i8 [[TMP77]], i64 3
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE27]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue27:
@@ -2006,8 +1986,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP80]], label [[PRED_LOAD_IF28:%.*]], label [[PRED_LOAD_CONTINUE29:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if28:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP81:%.*]] = extractelement <8 x i32> [[TMP55]], i64 4
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP82:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP81]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = load i8, i8* [[TMP82]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP82:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP81]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = load i8, ptr [[TMP82]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = insertelement <8 x i8> [[TMP79]], i8 [[TMP83]], i64 4
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE29]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue29:
@@ -2016,8 +1996,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP86]], label [[PRED_LOAD_IF30:%.*]], label [[PRED_LOAD_CONTINUE31:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if30:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP87:%.*]] = extractelement <8 x i32> [[TMP55]], i64 5
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP88:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP87]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = load i8, i8* [[TMP88]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP88:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP87]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = load i8, ptr [[TMP88]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = insertelement <8 x i8> [[TMP85]], i8 [[TMP89]], i64 5
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE31]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue31:
@@ -2026,8 +2006,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP92]], label [[PRED_LOAD_IF32:%.*]], label [[PRED_LOAD_CONTINUE33:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if32:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP93:%.*]] = extractelement <8 x i32> [[TMP55]], i64 6
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP94:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP93]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = load i8, i8* [[TMP94]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP94:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP93]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = load i8, ptr [[TMP94]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = insertelement <8 x i8> [[TMP91]], i8 [[TMP95]], i64 6
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE33]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue33:
@@ -2036,8 +2016,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP98]], label [[PRED_LOAD_IF34:%.*]], label [[PRED_LOAD_CONTINUE35:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if34:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP99:%.*]] = extractelement <8 x i32> [[TMP55]], i64 7
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP100:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP99]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP101:%.*]] = load i8, i8* [[TMP100]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP100:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP99]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP101:%.*]] = load i8, ptr [[TMP100]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = insertelement <8 x i8> [[TMP97]], i8 [[TMP101]], i64 7
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE35]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue35:
@@ -2047,72 +2027,72 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP105]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP106]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP106]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP108:%.*]] = extractelement <8 x i8> [[TMP104]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP108]], i8* [[TMP107]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP108]], ptr [[TMP107]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP109:%.*]] = extractelement <8 x i1> [[TMP5]], i64 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP109]], label [[PRED_STORE_IF36:%.*]], label [[PRED_STORE_CONTINUE37:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if36:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP110]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP110]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP112:%.*]] = extractelement <8 x i8> [[TMP104]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP112]], i8* [[TMP111]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP112]], ptr [[TMP111]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE37]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue37:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP113:%.*]] = extractelement <8 x i1> [[TMP5]], i64 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP113]], label [[PRED_STORE_IF38:%.*]], label [[PRED_STORE_CONTINUE39:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if38:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP114]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP114]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP116:%.*]] = extractelement <8 x i8> [[TMP104]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP116]], i8* [[TMP115]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP116]], ptr [[TMP115]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE39]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue39:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP117:%.*]] = extractelement <8 x i1> [[TMP5]], i64 3
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP117]], label [[PRED_STORE_IF40:%.*]], label [[PRED_STORE_CONTINUE41:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if40:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP118]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP118]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP120:%.*]] = extractelement <8 x i8> [[TMP104]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP120]], i8* [[TMP119]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP120]], ptr [[TMP119]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE41]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue41:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP121:%.*]] = extractelement <8 x i1> [[TMP5]], i64 4
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP121]], label [[PRED_STORE_IF42:%.*]], label [[PRED_STORE_CONTINUE43:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if42:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP122]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP122]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP124:%.*]] = extractelement <8 x i8> [[TMP104]], i64 4
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP124]], i8* [[TMP123]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP124]], ptr [[TMP123]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE43]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue43:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP125:%.*]] = extractelement <8 x i1> [[TMP5]], i64 5
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP125]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE45:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if44:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP126]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP126]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP128:%.*]] = extractelement <8 x i8> [[TMP104]], i64 5
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP128]], i8* [[TMP127]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP128]], ptr [[TMP127]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE45]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue45:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP129:%.*]] = extractelement <8 x i1> [[TMP5]], i64 6
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP129]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE47:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if46:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP130]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP130]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = extractelement <8 x i8> [[TMP104]], i64 6
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP132]], i8* [[TMP131]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP132]], ptr [[TMP131]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE47]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue47:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP133:%.*]] = extractelement <8 x i1> [[TMP5]], i64 7
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP133]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE49:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if48:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP134]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP134]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP136:%.*]] = extractelement <8 x i8> [[TMP104]], i64 7
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], i8* [[TMP135]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], ptr [[TMP135]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE49]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue49:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = sub <8 x i8> zeroinitializer, [[TMP104]]
@@ -2120,72 +2100,72 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP138]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE51:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if50:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = extractelement <8 x i32> [[TMP55]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP139]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP139]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = extractelement <8 x i8> [[TMP137]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP141]], i8* [[TMP140]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP141]], ptr [[TMP140]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE51]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue51:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i1> [[TMP5]], i64 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP142]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE53:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if52:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = extractelement <8 x i32> [[TMP55]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP143]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP143]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = extractelement <8 x i8> [[TMP137]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP145]], i8* [[TMP144]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP145]], ptr [[TMP144]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE53]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue53:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i1> [[TMP5]], i64 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP146]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE55:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if54:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = extractelement <8 x i32> [[TMP55]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP147]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP147]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = extractelement <8 x i8> [[TMP137]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP149]], i8* [[TMP148]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP149]], ptr [[TMP148]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE55]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue55:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i1> [[TMP5]], i64 3
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP150]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE57:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if56:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = extractelement <8 x i32> [[TMP55]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP151]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP151]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = extractelement <8 x i8> [[TMP137]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP153]], i8* [[TMP152]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP153]], ptr [[TMP152]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE57]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue57:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i1> [[TMP5]], i64 4
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP154]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE59:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if58:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = extractelement <8 x i32> [[TMP55]], i64 4
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP155]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP155]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = extractelement <8 x i8> [[TMP137]], i64 4
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP157]], i8* [[TMP156]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP157]], ptr [[TMP156]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE59]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue59:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i1> [[TMP5]], i64 5
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP158]], label [[PRED_STORE_IF60:%.*]], label [[PRED_STORE_CONTINUE61:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if60:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = extractelement <8 x i32> [[TMP55]], i64 5
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP159]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP159]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = extractelement <8 x i8> [[TMP137]], i64 5
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP161]], i8* [[TMP160]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP161]], ptr [[TMP160]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE61]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue61:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i1> [[TMP5]], i64 6
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP162]], label [[PRED_STORE_IF62:%.*]], label [[PRED_STORE_CONTINUE63:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if62:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = extractelement <8 x i32> [[TMP55]], i64 6
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP163]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP163]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP165:%.*]] = extractelement <8 x i8> [[TMP137]], i64 6
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP165]], i8* [[TMP164]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP165]], ptr [[TMP164]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE63]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue63:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP166:%.*]] = extractelement <8 x i1> [[TMP5]], i64 7
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP166]], label [[PRED_STORE_IF64:%.*]], label [[PRED_STORE_CONTINUE65]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if64:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP167:%.*]] = extractelement <8 x i32> [[TMP55]], i64 7
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP168:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP167]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP168:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP167]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP169:%.*]] = extractelement <8 x i8> [[TMP137]], i64 7
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP169]], i8* [[TMP168]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP169]], ptr [[TMP168]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE65]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue65:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
@@ -2198,17 +2178,17 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; ENABLED_MASKED_STRIDED:       if.then:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[MUL:%.*]] = shl nuw nsw i32 [[IX_024]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[MUL]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP171:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[MUL]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP171:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[ADD:%.*]] = or i32 [[MUL]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[ADD]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP172:%.*]] = load i8, i8* [[ARRAYIDX4]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[ADD]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP172:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP173:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP171]], i8 [[TMP172]])
-; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[MUL]]
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP173]], i8* [[ARRAYIDX6]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[MUL]]
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP173]], ptr [[ARRAYIDX6]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[SUB:%.*]] = sub i8 0, [[TMP173]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[ADD]]
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[SUB]], i8* [[ARRAYIDX11]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[ADD]]
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[SUB]], ptr [[ARRAYIDX11]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[FOR_INC]]
 ; ENABLED_MASKED_STRIDED:       for.inc:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INC]] = add nsw i32 [[IX_024]], -1
@@ -2228,18 +2208,18 @@ for.body:
 
 if.then:
   %mul = shl nuw nsw i32 %ix.024, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
   %add = or i32 %mul, 1
-  %arrayidx4 = getelementptr inbounds i8, i8* %p, i32 %add
-  %1 = load i8, i8* %arrayidx4, align 1
+  %arrayidx4 = getelementptr inbounds i8, ptr %p, i32 %add
+  %1 = load i8, ptr %arrayidx4, align 1
   %cmp.i = icmp slt i8 %0, %1
   %spec.select.i = select i1 %cmp.i, i8 %1, i8 %0
-  %arrayidx6 = getelementptr inbounds i8, i8* %q, i32 %mul
-  store i8 %spec.select.i, i8* %arrayidx6, align 1
+  %arrayidx6 = getelementptr inbounds i8, ptr %q, i32 %mul
+  store i8 %spec.select.i, ptr %arrayidx6, align 1
   %sub = sub i8 0, %spec.select.i
-  %arrayidx11 = getelementptr inbounds i8, i8* %q, i32 %add
-  store i8 %sub, i8* %arrayidx11, align 1
+  %arrayidx11 = getelementptr inbounds i8, ptr %q, i32 %add
+  store i8 %sub, ptr %arrayidx11, align 1
   br label %for.inc
 
 for.inc:
@@ -2277,7 +2257,7 @@ for.end:
 ; }
 ;}
 ;
-define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %guard, i32 %n) local_unnamed_addr optsize {
+define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %guard, i32 %n) local_unnamed_addr optsize {
 ; DISABLED_MASKED_STRIDED-LABEL: @masked_strided2_unknown_tc(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CMP22:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -2302,8 +2282,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP5]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, i8* [[TMP6]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP5]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = insertelement <8 x i8> poison, i8 [[TMP7]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue:
@@ -2312,8 +2292,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP11]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = load i8, i8* [[TMP12]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP11]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[TMP13]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue4:
@@ -2322,8 +2302,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP17]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, i8* [[TMP18]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP17]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = insertelement <8 x i8> [[TMP15]], i8 [[TMP19]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue6:
@@ -2332,8 +2312,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP22]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP23]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = load i8, i8* [[TMP24]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP23]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = insertelement <8 x i8> [[TMP21]], i8 [[TMP25]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue8:
@@ -2342,8 +2322,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP28]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP29]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = load i8, i8* [[TMP30]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP29]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = insertelement <8 x i8> [[TMP27]], i8 [[TMP31]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue10:
@@ -2352,8 +2332,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP35]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = load i8, i8* [[TMP36]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP35]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = load i8, ptr [[TMP36]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = insertelement <8 x i8> [[TMP33]], i8 [[TMP37]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue12:
@@ -2362,8 +2342,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP40]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP41]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = load i8, i8* [[TMP42]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP41]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = load i8, ptr [[TMP42]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP44:%.*]] = insertelement <8 x i8> [[TMP39]], i8 [[TMP43]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue14:
@@ -2372,8 +2352,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP46]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if15:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP47]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = load i8, i8* [[TMP48]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP47]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = load i8, ptr [[TMP48]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = insertelement <8 x i8> [[TMP45]], i8 [[TMP49]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue16:
@@ -2383,8 +2363,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP53]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if17:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = extractelement <8 x i32> [[TMP52]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP55:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP54]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP56:%.*]] = load i8, i8* [[TMP55]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP55:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP54]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP56:%.*]] = load i8, ptr [[TMP55]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP57:%.*]] = insertelement <8 x i8> poison, i8 [[TMP56]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue18:
@@ -2393,8 +2373,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP59]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if19:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = extractelement <8 x i32> [[TMP52]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP61:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP60]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP62:%.*]] = load i8, i8* [[TMP61]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP60]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP62:%.*]] = load i8, ptr [[TMP61]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP63:%.*]] = insertelement <8 x i8> [[TMP58]], i8 [[TMP62]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue20:
@@ -2403,8 +2383,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP65]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if21:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = extractelement <8 x i32> [[TMP52]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP67:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP66]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP68:%.*]] = load i8, i8* [[TMP67]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP67:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP66]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP68:%.*]] = load i8, ptr [[TMP67]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP69:%.*]] = insertelement <8 x i8> [[TMP64]], i8 [[TMP68]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue22:
@@ -2413,8 +2393,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP71]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if23:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = extractelement <8 x i32> [[TMP52]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP73:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP72]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP74:%.*]] = load i8, i8* [[TMP73]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP73:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP72]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP74:%.*]] = load i8, ptr [[TMP73]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP75:%.*]] = insertelement <8 x i8> [[TMP70]], i8 [[TMP74]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue24:
@@ -2423,8 +2403,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP77]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if25:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = extractelement <8 x i32> [[TMP52]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP79:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP78]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP80:%.*]] = load i8, i8* [[TMP79]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP79:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP78]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP80:%.*]] = load i8, ptr [[TMP79]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP81:%.*]] = insertelement <8 x i8> [[TMP76]], i8 [[TMP80]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue26:
@@ -2433,8 +2413,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP83]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if27:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = extractelement <8 x i32> [[TMP52]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP85:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP84]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP86:%.*]] = load i8, i8* [[TMP85]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP85:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP84]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP86:%.*]] = load i8, ptr [[TMP85]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP87:%.*]] = insertelement <8 x i8> [[TMP82]], i8 [[TMP86]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue28:
@@ -2443,8 +2423,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP89]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if29:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = extractelement <8 x i32> [[TMP52]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP91:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP90]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP92:%.*]] = load i8, i8* [[TMP91]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP91:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP90]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP92:%.*]] = load i8, ptr [[TMP91]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP93:%.*]] = insertelement <8 x i8> [[TMP88]], i8 [[TMP92]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue30:
@@ -2453,8 +2433,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP95]], label [[PRED_LOAD_IF31:%.*]], label [[PRED_LOAD_CONTINUE32:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if31:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = extractelement <8 x i32> [[TMP52]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP97:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP96]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP98:%.*]] = load i8, i8* [[TMP97]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP97:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP96]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP98:%.*]] = load i8, ptr [[TMP97]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP99:%.*]] = insertelement <8 x i8> [[TMP94]], i8 [[TMP98]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE32]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue32:
@@ -2464,72 +2444,72 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP102]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP103:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP104:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP103]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP104:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[TMP103]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP105:%.*]] = extractelement <8 x i8> [[TMP101]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP105]], i8* [[TMP104]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP105]], ptr [[TMP104]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = extractelement <8 x i1> [[TMP3]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP106]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if33:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP108:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP107]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP108:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP107]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP109:%.*]] = extractelement <8 x i8> [[TMP101]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP109]], i8* [[TMP108]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP109]], ptr [[TMP108]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE34]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue34:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = extractelement <8 x i1> [[TMP3]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP110]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if35:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP112:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP111]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP112:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP111]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP113:%.*]] = extractelement <8 x i8> [[TMP101]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP113]], i8* [[TMP112]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP113]], ptr [[TMP112]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE36]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue36:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = extractelement <8 x i1> [[TMP3]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP114]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if37:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP116:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP115]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP116:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP115]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP117:%.*]] = extractelement <8 x i8> [[TMP101]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP117]], i8* [[TMP116]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP117]], ptr [[TMP116]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE38]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue38:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = extractelement <8 x i1> [[TMP3]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP118]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if39:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP120:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP119]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP120:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP119]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP121:%.*]] = extractelement <8 x i8> [[TMP101]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP121]], i8* [[TMP120]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP121]], ptr [[TMP120]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE40]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue40:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = extractelement <8 x i1> [[TMP3]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP122]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if41:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP124:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP123]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP124:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP123]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP125:%.*]] = extractelement <8 x i8> [[TMP101]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP125]], i8* [[TMP124]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP125]], ptr [[TMP124]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE42]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue42:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = extractelement <8 x i1> [[TMP3]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP126]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if43:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP128:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP127]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP128:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP127]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP129:%.*]] = extractelement <8 x i8> [[TMP101]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP129]], i8* [[TMP128]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP129]], ptr [[TMP128]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE44]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue44:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = extractelement <8 x i1> [[TMP3]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP130]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if45:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP131]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP131]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP133:%.*]] = extractelement <8 x i8> [[TMP101]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP133]], i8* [[TMP132]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP133]], ptr [[TMP132]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE46]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue46:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = sub <8 x i8> zeroinitializer, [[TMP101]]
@@ -2537,72 +2517,72 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP135]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if47:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP136:%.*]] = extractelement <8 x i32> [[TMP52]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP136]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP136]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP138:%.*]] = extractelement <8 x i8> [[TMP134]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP138]], i8* [[TMP137]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP138]], ptr [[TMP137]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE48]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue48:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = extractelement <8 x i1> [[TMP3]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP139]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if49:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = extractelement <8 x i32> [[TMP52]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP140]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP140]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i8> [[TMP134]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP142]], i8* [[TMP141]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP142]], ptr [[TMP141]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE50]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue50:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = extractelement <8 x i1> [[TMP3]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP143]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if51:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = extractelement <8 x i32> [[TMP52]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP144]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP144]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i8> [[TMP134]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP146]], i8* [[TMP145]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP146]], ptr [[TMP145]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE52]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue52:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = extractelement <8 x i1> [[TMP3]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP147]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if53:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = extractelement <8 x i32> [[TMP52]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP148]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP148]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i8> [[TMP134]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP150]], i8* [[TMP149]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP150]], ptr [[TMP149]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE54]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue54:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = extractelement <8 x i1> [[TMP3]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP151]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if55:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = extractelement <8 x i32> [[TMP52]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP152]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP152]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i8> [[TMP134]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP154]], i8* [[TMP153]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP154]], ptr [[TMP153]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE56]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue56:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = extractelement <8 x i1> [[TMP3]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP155]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if57:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = extractelement <8 x i32> [[TMP52]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP156]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP156]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i8> [[TMP134]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP158]], i8* [[TMP157]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP158]], ptr [[TMP157]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE58]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue58:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = extractelement <8 x i1> [[TMP3]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP159]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if59:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = extractelement <8 x i32> [[TMP52]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP160]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP160]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i8> [[TMP134]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP162]], i8* [[TMP161]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP162]], ptr [[TMP161]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE60]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue60:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = extractelement <8 x i1> [[TMP3]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP163]], label [[PRED_STORE_IF61:%.*]], label [[PRED_STORE_CONTINUE62]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if61:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = extractelement <8 x i32> [[TMP52]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP165:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP164]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP165:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP164]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP166:%.*]] = extractelement <8 x i8> [[TMP134]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP166]], i8* [[TMP165]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP166]], ptr [[TMP165]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE62]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue62:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
@@ -2631,21 +2611,19 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl i32 [[INDEX]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr i8, i8* [[P:%.*]], i32 [[TMP2]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP2]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>*
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = or i32 [[TMP2]], 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8> [[STRIDED_VEC3]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = sub <8 x i8> zeroinitializer, [[TMP7]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 -1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr i8, i8* [[TMP9]], i32 [[TMP6]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <16 x i8>*
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 -1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP9]], i32 [[TMP6]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> [[TMP8]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP11]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
+; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP10]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
@@ -2667,18 +2645,18 @@ for.body:
 
 if.then:
   %mul = shl nuw nsw i32 %ix.023, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
   %add = or i32 %mul, 1
-  %arrayidx3 = getelementptr inbounds i8, i8* %p, i32 %add
-  %1 = load i8, i8* %arrayidx3, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %p, i32 %add
+  %1 = load i8, ptr %arrayidx3, align 1
   %cmp.i = icmp slt i8 %0, %1
   %spec.select.i = select i1 %cmp.i, i8 %1, i8 %0
-  %arrayidx5 = getelementptr inbounds i8, i8* %q, i32 %mul
-  store i8 %spec.select.i, i8* %arrayidx5, align 1
+  %arrayidx5 = getelementptr inbounds i8, ptr %q, i32 %mul
+  store i8 %spec.select.i, ptr %arrayidx5, align 1
   %sub = sub i8 0, %spec.select.i
-  %arrayidx9 = getelementptr inbounds i8, i8* %q, i32 %add
-  store i8 %sub, i8* %arrayidx9, align 1
+  %arrayidx9 = getelementptr inbounds i8, ptr %q, i32 %add
+  store i8 %sub, ptr %arrayidx9, align 1
   br label %for.inc
 
 for.inc:
@@ -2719,7 +2697,7 @@ for.end:
 ; }
 ;}
 ;
-define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %n) local_unnamed_addr optsize {
+define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %n) local_unnamed_addr optsize {
 ; DISABLED_MASKED_STRIDED-LABEL: @unconditional_masked_strided2_unknown_tc(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CMP20:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -2740,8 +2718,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue:
@@ -2750,8 +2728,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue2:
@@ -2760,8 +2738,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, i8* [[TMP16]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue4:
@@ -2770,8 +2748,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP21]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP21]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue6:
@@ -2780,8 +2758,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP27]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, i8* [[TMP28]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP27]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue8:
@@ -2790,8 +2768,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP33]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, i8* [[TMP34]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP33]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue10:
@@ -2800,8 +2778,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP39]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, i8* [[TMP40]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP39]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue12:
@@ -2810,8 +2788,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP45]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP45]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, ptr [[TMP46]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue14:
@@ -2821,8 +2799,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if15:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP52]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = load i8, i8* [[TMP53]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP52]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP55:%.*]] = insertelement <8 x i8> poison, i8 [[TMP54]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue16:
@@ -2831,8 +2809,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP57]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if17:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP58:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP58]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = load i8, i8* [[TMP59]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP58]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = load i8, ptr [[TMP59]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP61:%.*]] = insertelement <8 x i8> [[TMP56]], i8 [[TMP60]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue18:
@@ -2841,8 +2819,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP63]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if19:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP64:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP64]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = load i8, i8* [[TMP65]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP64]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = load i8, ptr [[TMP65]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP67:%.*]] = insertelement <8 x i8> [[TMP62]], i8 [[TMP66]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue20:
@@ -2851,8 +2829,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP69]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if21:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP70:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP70]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = load i8, i8* [[TMP71]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP70]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = load i8, ptr [[TMP71]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP73:%.*]] = insertelement <8 x i8> [[TMP68]], i8 [[TMP72]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue22:
@@ -2861,8 +2839,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP75]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if23:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP76:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP76]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = load i8, i8* [[TMP77]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP76]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = load i8, ptr [[TMP77]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP79:%.*]] = insertelement <8 x i8> [[TMP74]], i8 [[TMP78]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue24:
@@ -2871,8 +2849,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP81]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if25:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP82:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP82]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = load i8, i8* [[TMP83]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP82]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = load i8, ptr [[TMP83]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP85:%.*]] = insertelement <8 x i8> [[TMP80]], i8 [[TMP84]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue26:
@@ -2881,8 +2859,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if27:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP88:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP88]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = load i8, i8* [[TMP89]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP88]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = load i8, ptr [[TMP89]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP91:%.*]] = insertelement <8 x i8> [[TMP86]], i8 [[TMP90]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue28:
@@ -2891,8 +2869,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP93]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if29:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP94:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP94]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = load i8, i8* [[TMP95]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP94]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = load i8, ptr [[TMP95]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP97:%.*]] = insertelement <8 x i8> [[TMP92]], i8 [[TMP96]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue30:
@@ -2902,72 +2880,72 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP100]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP101:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP101]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[TMP101]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP103:%.*]] = extractelement <8 x i8> [[TMP99]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP103]], i8* [[TMP102]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP103]], ptr [[TMP102]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP104:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP104]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if31:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP105:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP105]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP105]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = extractelement <8 x i8> [[TMP99]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP107]], i8* [[TMP106]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP107]], ptr [[TMP106]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE32]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue32:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP108:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP108]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if33:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP109:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP109]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP109]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = extractelement <8 x i8> [[TMP99]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP111]], i8* [[TMP110]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP111]], ptr [[TMP110]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE34]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue34:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP112:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP112]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if35:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP113:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP113]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP113]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = extractelement <8 x i8> [[TMP99]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP115]], i8* [[TMP114]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP115]], ptr [[TMP114]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE36]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue36:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP116:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP116]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if37:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP117:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP117]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP117]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = extractelement <8 x i8> [[TMP99]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP119]], i8* [[TMP118]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP119]], ptr [[TMP118]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE38]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue38:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP120:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP120]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if39:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP121:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP121]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP121]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = extractelement <8 x i8> [[TMP99]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP123]], i8* [[TMP122]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP123]], ptr [[TMP122]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE40]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue40:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP124:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP124]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if41:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP125:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP125]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP125]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = extractelement <8 x i8> [[TMP99]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP127]], i8* [[TMP126]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP127]], ptr [[TMP126]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE42]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue42:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP128:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP128]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if43:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP129:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP129]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP129]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = extractelement <8 x i8> [[TMP99]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP131]], i8* [[TMP130]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP131]], ptr [[TMP130]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE44]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue44:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]]
@@ -2975,72 +2953,72 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if45:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP134]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP134]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP136:%.*]] = extractelement <8 x i8> [[TMP132]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], i8* [[TMP135]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], ptr [[TMP135]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE46]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue46:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if47:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP138]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP138]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = extractelement <8 x i8> [[TMP132]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP140]], i8* [[TMP139]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP140]], ptr [[TMP139]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE48]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue48:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if49:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP142]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP142]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP144]], i8* [[TMP143]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP144]], ptr [[TMP143]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE50]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue50:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if51:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP146]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP146]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP148]], i8* [[TMP147]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP148]], ptr [[TMP147]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE52]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue52:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if53:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP150]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP150]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP152]], i8* [[TMP151]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP152]], ptr [[TMP151]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE54]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue54:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if55:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP154]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP154]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP156]], i8* [[TMP155]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP156]], ptr [[TMP155]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE56]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue56:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if57:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP158]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP158]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP160]], i8* [[TMP159]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP160]], ptr [[TMP159]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE58]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue58:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if59:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP162]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP162]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP164]], i8* [[TMP163]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP164]], ptr [[TMP163]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE60]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue60:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
@@ -3068,20 +3046,18 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IV:%.*]] = or <8 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP1]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP2]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = or i32 [[TMP1]], 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8> [[STRIDED_VEC3]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = sub <8 x i8> zeroinitializer, [[TMP5]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 -1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[TMP7]], i32 [[TMP4]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <16 x i8>*
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 -1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i32 [[TMP4]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP9]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
+; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP8]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP10]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
@@ -3098,18 +3074,18 @@ for.body.preheader:
 for.body:
   %ix.021 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %mul = shl nuw nsw i32 %ix.021, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
   %add = or i32 %mul, 1
-  %arrayidx2 = getelementptr inbounds i8, i8* %p, i32 %add
-  %1 = load i8, i8* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds i8, ptr %p, i32 %add
+  %1 = load i8, ptr %arrayidx2, align 1
   %cmp.i = icmp slt i8 %0, %1
   %spec.select.i = select i1 %cmp.i, i8 %1, i8 %0
-  %arrayidx4 = getelementptr inbounds i8, i8* %q, i32 %mul
-  store i8 %spec.select.i, i8* %arrayidx4, align 1
+  %arrayidx4 = getelementptr inbounds i8, ptr %q, i32 %mul
+  store i8 %spec.select.i, ptr %arrayidx4, align 1
   %sub = sub i8 0, %spec.select.i
-  %arrayidx8 = getelementptr inbounds i8, i8* %q, i32 %add
-  store i8 %sub, i8* %arrayidx8, align 1
+  %arrayidx8 = getelementptr inbounds i8, ptr %q, i32 %add
+  store i8 %sub, ptr %arrayidx8, align 1
   %inc = add nuw nsw i32 %ix.021, 1
   %exitcond = icmp eq i32 %inc, %n
   br i1 %exitcond, label %for.end.loopexit, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
index 8525bf10ae7da..16f85df40f1cd 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
@@ -18,53 +18,51 @@ target triple = "x86_64-unknown-linux-gnu"
 ; (relates to the testcase in PR50566)
 
 ; Function Attrs: nofree norecurse nosync nounwind uwtable
-define dso_local void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) local_unnamed_addr {
+define dso_local void @test1(ptr noalias nocapture %points, ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y) local_unnamed_addr {
 ; DISABLED_MASKED_STRIDED-LABEL: @test1(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; DISABLED_MASKED_STRIDED:       vector.body:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <4 x i16>*
-; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl nuw nsw <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[POINTS:%.*]], i64 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[POINTS:%.*]], i64 [[TMP3]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP2]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP5]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP5]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP2]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP7]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP7]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <4 x i64> [[TMP2]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP9]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP11]], i16* [[TMP4]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP11]], ptr [[TMP4]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP12]], i16* [[TMP6]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP12]], ptr [[TMP6]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP13]], i16* [[TMP8]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP13]], ptr [[TMP8]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP14]], i16* [[TMP10]], align 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i64 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = bitcast i16* [[TMP15]] to <4 x i16>*
-; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP16]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP14]], ptr [[TMP10]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i16>, ptr [[TMP15]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = or <4 x i64> [[TMP2]], <i64 1, i64 1, i64 1, i64 1>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = extractelement <4 x i64> [[TMP17]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP18]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP18]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = extractelement <4 x i64> [[TMP17]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP20]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP20]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = extractelement <4 x i64> [[TMP17]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP22]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP22]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = extractelement <4 x i64> [[TMP17]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP24]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP24]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP26]], i16* [[TMP19]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP26]], ptr [[TMP19]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP27]], i16* [[TMP21]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP27]], ptr [[TMP21]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP28]], i16* [[TMP23]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP28]], ptr [[TMP23]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP29]], i16* [[TMP25]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP29]], ptr [[TMP25]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
@@ -74,22 +72,19 @@ define dso_local void @test1(i16* noalias nocapture %points, i16* noalias nocapt
 ;
 ; ENABLED_MASKED_STRIDED-LABEL: @test1(
 ; ENABLED_MASKED_STRIDED-NEXT:  entry:
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr i16, i16* [[POINTS:%.*]], i64 -1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr i16, ptr [[POINTS:%.*]], i64 -1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; ENABLED_MASKED_STRIDED:       vector.body:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <4 x i16>*
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP2]], align 2
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP1]], align 2
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[INDEX]], 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i64 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>*
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i16>, ptr [[TMP4]], align 2
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = or i64 [[TMP3]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr i16, i16* [[TMP0]], i64 [[TMP6]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <16 x i16>*
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr i16, ptr [[TMP0]], i64 [[TMP6]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <16 x i32> <i32 0, i32 4, i32 undef, i32 undef, i32 1, i32 5, i32 undef, i32 undef, i32 2, i32 6, i32 undef, i32 undef, i32 3, i32 7, i32 undef, i32 undef>
-; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> [[INTERLEAVED_VEC]], <16 x i16>* [[TMP8]], i32 2, <16 x i1> <i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false>)
+; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i16.p0(<16 x i16> [[INTERLEAVED_VEC]], ptr [[TMP7]], i32 2, <16 x i1> <i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false>)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP9]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -101,16 +96,16 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %x, i64 %indvars.iv
+  %0 = load i16, ptr %arrayidx, align 2
   %1 = shl nuw nsw i64 %indvars.iv, 2
-  %arrayidx2 = getelementptr inbounds i16, i16* %points, i64 %1
-  store i16 %0, i16* %arrayidx2, align 2
-  %arrayidx4 = getelementptr inbounds i16, i16* %y, i64 %indvars.iv
-  %2 = load i16, i16* %arrayidx4, align 2
+  %arrayidx2 = getelementptr inbounds i16, ptr %points, i64 %1
+  store i16 %0, ptr %arrayidx2, align 2
+  %arrayidx4 = getelementptr inbounds i16, ptr %y, i64 %indvars.iv
+  %2 = load i16, ptr %arrayidx4, align 2
   %3 = or i64 %1, 1
-  %arrayidx7 = getelementptr inbounds i16, i16* %points, i64 %3
-  store i16 %2, i16* %arrayidx7, align 2
+  %arrayidx7 = getelementptr inbounds i16, ptr %points, i64 %3
+  store i16 %2, ptr %arrayidx7, align 2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond.not, label %for.end, label %for.body
@@ -129,7 +124,7 @@ for.end:
 ;     }
 
 ; Function Attrs: nofree norecurse nosync nounwind uwtable
-define dso_local void @test2(i16* noalias nocapture %points, i32 %numPoints, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) local_unnamed_addr {
+define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y) local_unnamed_addr {
 ; DISABLED_MASKED_STRIDED-LABEL: @test2(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CMP15:%.*]] = icmp sgt i32 [[NUMPOINTS:%.*]], 0
@@ -146,84 +141,82 @@ define dso_local void @test2(i16* noalias nocapture %points, i32 %numPoints, i16
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE15:%.*]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE15]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <4 x i16>*
-; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP2]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison)
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP1]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = shl nsw <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[POINTS:%.*]], i64 [[TMP5]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[POINTS:%.*]], i64 [[TMP5]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP7]], i16* [[TMP6]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP7]], ptr [[TMP6]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <4 x i64> [[TMP3]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP9]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP11]], i16* [[TMP10]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP11]], ptr [[TMP10]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue2:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP12]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = extractelement <4 x i64> [[TMP3]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP13]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP13]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP15]], i16* [[TMP14]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP15]], ptr [[TMP14]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue4:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = extractelement <4 x i64> [[TMP3]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP17]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP17]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP19]], i16* [[TMP18]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP19]], ptr [[TMP18]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue6:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i64 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = bitcast i16* [[TMP20]] to <4 x i16>*
-; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD7:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP21]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison)
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD7:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP20]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = or <4 x i64> [[TMP3]], <i64 1, i64 1, i64 1, i64 1>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP23]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if8:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP24]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP24]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD7]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP26]], i16* [[TMP25]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP26]], ptr [[TMP25]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE9]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP27]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if10:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = extractelement <4 x i64> [[TMP22]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP28]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP28]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD7]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP30]], i16* [[TMP29]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP30]], ptr [[TMP29]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE11]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP31]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if12:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = extractelement <4 x i64> [[TMP22]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP32]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP32]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD7]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP34]], i16* [[TMP33]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP34]], ptr [[TMP33]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE13]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP35]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if14:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = extractelement <4 x i64> [[TMP22]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP36]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP36]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD7]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP38]], i16* [[TMP37]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP38]], ptr [[TMP37]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE15]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue15:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
@@ -246,7 +239,7 @@ define dso_local void @test2(i16* noalias nocapture %points, i32 %numPoints, i16
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
 ; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr i16, i16* [[POINTS:%.*]], i64 -1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr i16, ptr [[POINTS:%.*]], i64 -1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; ENABLED_MASKED_STRIDED:       vector.body:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -254,20 +247,17 @@ define dso_local void @test2(i16* noalias nocapture %points, i32 %numPoints, i16
 ; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <4 x i16>*
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP3]], i32 2, <4 x i1> [[TMP1]], <4 x i16> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP2]], i32 2, <4 x i1> [[TMP1]], <4 x i16> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = shl nsw i64 [[INDEX]], 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i64 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <4 x i16>*
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD3:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP6]], i32 2, <4 x i1> [[TMP1]], <4 x i16> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD3:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP5]], i32 2, <4 x i1> [[TMP1]], <4 x i16> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = or i64 [[TMP4]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr i16, i16* [[TMP0]], i64 [[TMP7]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <16 x i16>*
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr i16, ptr [[TMP0]], i64 [[TMP7]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i16> [[WIDE_MASKED_LOAD]], <4 x i16> [[WIDE_MASKED_LOAD3]], <16 x i32> <i32 0, i32 4, i32 undef, i32 undef, i32 1, i32 5, i32 undef, i32 undef, i32 2, i32 6, i32 undef, i32 undef, i32 3, i32 7, i32 undef, i32 undef>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false>
-; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> [[INTERLEAVED_VEC]], <16 x i16>* [[TMP9]], i32 2, <16 x i1> [[TMP10]])
+; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i16.p0(<16 x i16> [[INTERLEAVED_VEC]], ptr [[TMP8]], i32 2, <16 x i1> [[TMP10]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP11]], label [[FOR_END_LOOPEXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -286,16 +276,16 @@ for.body.preheader:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %x, i64 %indvars.iv
+  %0 = load i16, ptr %arrayidx, align 2
   %1 = shl nsw i64 %indvars.iv, 2
-  %arrayidx2 = getelementptr inbounds i16, i16* %points, i64 %1
-  store i16 %0, i16* %arrayidx2, align 2
-  %arrayidx4 = getelementptr inbounds i16, i16* %y, i64 %indvars.iv
-  %2 = load i16, i16* %arrayidx4, align 2
+  %arrayidx2 = getelementptr inbounds i16, ptr %points, i64 %1
+  store i16 %0, ptr %arrayidx2, align 2
+  %arrayidx4 = getelementptr inbounds i16, ptr %y, i64 %indvars.iv
+  %2 = load i16, ptr %arrayidx4, align 2
   %3 = or i64 %1, 1
-  %arrayidx7 = getelementptr inbounds i16, i16* %points, i64 %3
-  store i16 %2, i16* %arrayidx7, align 2
+  %arrayidx7 = getelementptr inbounds i16, ptr %points, i64 %3
+  store i16 %2, ptr %arrayidx7, align 2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond.not, label %for.end.loopexit, label %for.body
@@ -318,52 +308,51 @@ for.end:
 ;         points[i*3] = x[i];
 ;     }
 ; Function Attrs: nofree norecurse nosync nounwind uwtable
-define dso_local void @test(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readnone %y) local_unnamed_addr {
+define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture readonly %x, ptr noalias nocapture readnone %y) local_unnamed_addr {
 ; DISABLED_MASKED_STRIDED-LABEL: @test(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; DISABLED_MASKED_STRIDED:       vector.body:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <4 x i16>*
-; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i16> [[WIDE_LOAD]], zeroinitializer
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = mul nuw nsw <4 x i64> [[VEC_IND]], <i64 3, i64 3, i64 3, i64 3>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[POINTS:%.*]], i64 [[TMP5]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[POINTS:%.*]], i64 [[TMP5]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP7]], i16* [[TMP6]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP7]], ptr [[TMP6]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <4 x i64> [[TMP3]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP9]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP11]], i16* [[TMP10]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP11]], ptr [[TMP10]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue2:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP12]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = extractelement <4 x i64> [[TMP3]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP13]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP13]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP15]], i16* [[TMP14]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP15]], ptr [[TMP14]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue4:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = extractelement <4 x i64> [[TMP3]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP17]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP17]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP19]], i16* [[TMP18]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP19]], ptr [[TMP18]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue6:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -379,45 +368,44 @@ define dso_local void @test(i16* noalias nocapture %points, i16* noalias nocaptu
 ; ENABLED_MASKED_STRIDED:       vector.body:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <4 x i16>*
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i16> [[WIDE_LOAD]], zeroinitializer
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = mul nuw nsw <4 x i64> [[VEC_IND]], <i64 3, i64 3, i64 3, i64 3>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[POINTS:%.*]], i64 [[TMP5]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[POINTS:%.*]], i64 [[TMP5]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP7]], i16* [[TMP6]], align 2
+; ENABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP7]], ptr [[TMP6]], align 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if1:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <4 x i64> [[TMP3]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP9]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP9]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP11]], i16* [[TMP10]], align 2
+; ENABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP11]], ptr [[TMP10]], align 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue2:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP12]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if3:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = extractelement <4 x i64> [[TMP3]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP13]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP13]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP15]], i16* [[TMP14]], align 2
+; ENABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP15]], ptr [[TMP14]], align 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue4:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if5:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = extractelement <4 x i64> [[TMP3]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP17]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP17]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP19]], i16* [[TMP18]], align 2
+; ENABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP19]], ptr [[TMP18]], align 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue6:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -432,15 +420,15 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
-  %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %x, i64 %indvars.iv
+  %0 = load i16, ptr %arrayidx, align 2
   %cmp1 = icmp sgt i16 %0, 0
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:
   %1 = mul nuw nsw i64 %indvars.iv, 3
-  %arrayidx6 = getelementptr inbounds i16, i16* %points, i64 %1
-  store i16 %0, i16* %arrayidx6, align 2
+  %arrayidx6 = getelementptr inbounds i16, ptr %points, i64 %1
+  store i16 %0, ptr %arrayidx6, align 2
   br label %for.inc
 
 for.inc:

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll
index 73879b5a87414..6ed398bb5bea5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll
@@ -29,9 +29,9 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
 ;CHECK-NEXT:  %{{.*}} = shl nuw nsw <8 x i32> %[[VECIND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 ;CHECK-NEXT:  %[[M:.+]] = extractelement <8 x i1> %[[VMASK]], i32 0
 ;CHECK-NEXT:  br i1 %[[M]], label %pred.store.if, label %pred.store.continue
-;CHECK-NOT:   %{{.+}} = load <16 x i8>, <16 x i8>* %{{.*}}, align 1
+;CHECK-NOT:   %{{.+}} = load <16 x i8>, ptr %{{.*}}, align 1
 
-define dso_local void @masked_strided(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr {
+define dso_local void @masked_strided(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr {
 entry:
   %conv = zext i8 %guard to i32
   br label %for.body
@@ -43,10 +43,10 @@ for.body:
 
 if.then:
   %mul = shl nuw nsw i32 %ix.09, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %ix.09
-  store i8 %0, i8* %arrayidx3, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %q, i32 %ix.09
+  store i8 %0, ptr %arrayidx3, align 1
   br label %for.inc
 
 for.inc:

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-interleaved-access.ll b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-interleaved-access.ll
index 266e4b4fe8de5..368361fd760ec 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-interleaved-access.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-interleaved-access.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 ; Verify that we do not get any loads of vectors with x86_fp80 elements.
 ;
 ; CHECK-NOT: load {{.*}} x x86_fp80
-define x86_fp80 @foo(x86_fp80* %a) {
+define x86_fp80 @foo(ptr %a) {
 entry:
   br label %for.body
 
@@ -16,11 +16,11 @@ for.cond.cleanup:
 for.body:
   %i.09 = phi i16 [ 0, %entry ], [ %add3, %for.body ]
   %res.08 = phi x86_fp80 [ undef, %entry ], [ %3, %for.body ]
-  %arrayidx = getelementptr inbounds x86_fp80, x86_fp80* %a, i16 %i.09
-  %0 = load x86_fp80, x86_fp80* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds x86_fp80, ptr %a, i16 %i.09
+  %0 = load x86_fp80, ptr %arrayidx, align 1
   %add = or i16 %i.09, 1
-  %arrayidx2 = getelementptr inbounds x86_fp80, x86_fp80* %a, i16 %add
-  %1 = load x86_fp80, x86_fp80* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds x86_fp80, ptr %a, i16 %add
+  %1 = load x86_fp80, ptr %arrayidx2, align 1
   %2 = fadd fast x86_fp80 %0, %res.08
   %3 = fadd fast x86_fp80 %2, %1
   %add3 = add nuw nsw i16 %i.09, 2

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
index b040c920d4c7f..c424fff3bb3c7 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
@@ -14,10 +14,10 @@ define void @example() nounwind ssp uwtable {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1024 x x86_fp80], [1024 x x86_fp80]* @x, i64 0, i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1024 x x86_fp80], [1024 x x86_fp80]* @x, i64 0, i64 [[TMP0]]
-; CHECK-NEXT:    store x86_fp80 0xK3FFF8000000000000000, x86_fp80* [[TMP1]], align 16
-; CHECK-NEXT:    store x86_fp80 0xK3FFF8000000000000000, x86_fp80* [[TMP2]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1024 x x86_fp80], ptr @x, i64 0, i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1024 x x86_fp80], ptr @x, i64 0, i64 [[TMP0]]
+; CHECK-NEXT:    store x86_fp80 0xK3FFF8000000000000000, ptr [[TMP1]], align 16
+; CHECK-NEXT:    store x86_fp80 0xK3FFF8000000000000000, ptr [[TMP2]], align 16
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -30,8 +30,8 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %conv = sitofp i32 1 to x86_fp80
-  %arrayidx = getelementptr inbounds [1024 x x86_fp80], [1024 x x86_fp80]* @x, i64 0, i64 %indvars.iv
-  store x86_fp80 %conv, x86_fp80* %arrayidx, align 16
+  %arrayidx = getelementptr inbounds [1024 x x86_fp80], ptr @x, i64 0, i64 %indvars.iv
+  store x86_fp80 %conv, ptr %arrayidx, align 16
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024

diff  --git a/llvm/test/Transforms/LoopVectorize/alias-set-with-uncomputable-bounds.ll b/llvm/test/Transforms/LoopVectorize/alias-set-with-uncomputable-bounds.ll
index 855bba2138100..00637551fe1bc 100644
--- a/llvm/test/Transforms/LoopVectorize/alias-set-with-uncomputable-bounds.ll
+++ b/llvm/test/Transforms/LoopVectorize/alias-set-with-uncomputable-bounds.ll
@@ -5,7 +5,7 @@
 
 ; Alias set with uncomputable bounds contains a single load. We do not need
 ; runtime checks for that group and it should not block vectorization.
-define void @test1_uncomputable_bounds_single_load(i32* noalias %ptr.1, i32* noalias %ptr.2, i32* noalias %ptr.3, i64 %N, i64 %X) {
+define void @test1_uncomputable_bounds_single_load(ptr noalias %ptr.1, ptr noalias %ptr.2, ptr noalias %ptr.3, i64 %N, i64 %X) {
 ; CHECK-LABEL: define void @test1_uncomputable_bounds_single_load
 ; CHECK:       vector.body
 ; CHECK:         ret void
@@ -19,15 +19,15 @@ ph:
 
 loop:
   %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ]
-  %gep.1 = getelementptr inbounds i32, i32* %ptr.3, i64 %iv
-  %offset.1 = load i32, i32* %gep.1, align 4
-  %gep.2 = getelementptr inbounds i32, i32* %ptr.2, i32 %offset.1
-  %lv = load i32, i32* %gep.2, align 4
-  %gep.3 = getelementptr inbounds i32, i32* %ptr.1, i64 %iv
-  store i32 %lv , i32* %gep.3, align 4
+  %gep.1 = getelementptr inbounds i32, ptr %ptr.3, i64 %iv
+  %offset.1 = load i32, ptr %gep.1, align 4
+  %gep.2 = getelementptr inbounds i32, ptr %ptr.2, i32 %offset.1
+  %lv = load i32, ptr %gep.2, align 4
+  %gep.3 = getelementptr inbounds i32, ptr %ptr.1, i64 %iv
+  store i32 %lv , ptr %gep.3, align 4
   %offset.2 = add nsw i64 %iv, %X
-  %gep.4 = getelementptr inbounds i32, i32* %ptr.1, i64 %offset.2
-  store i32 %lv, i32* %gep.4, align 4
+  %gep.4 = getelementptr inbounds i32, ptr %ptr.1, i64 %offset.2
+  store i32 %lv, ptr %gep.4, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %N
   br i1 %exitcond, label %loop.exit, label %loop
@@ -41,7 +41,7 @@ exit:
 
 ; Alias set with uncomputable bounds contains a single store. We do not need
 ; runtime checks for that group and it should not block vectorization.
-define void @test2_uncomputable_bounds_single_store(i32* noalias %ptr.1, i32* noalias %ptr.2, i32* noalias %ptr.3, i64 %N, i64 %X) {
+define void @test2_uncomputable_bounds_single_store(ptr noalias %ptr.1, ptr noalias %ptr.2, ptr noalias %ptr.3, i64 %N, i64 %X) {
 ; CHECK-LABEL: define void @test2_uncomputable_bounds_single_store
 ; CHECK:       vector.body
 ; CHECK:         ret void
@@ -55,15 +55,15 @@ ph:
 
 loop:
   %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ]
-  %gep.1 = getelementptr inbounds i32, i32* %ptr.3, i64 %iv
-  %offset.1 = load i32, i32* %gep.1, align 4
-  %gep.2 = getelementptr inbounds i32, i32* %ptr.2, i32 %offset.1
-  store i32 20, i32* %gep.2, align 4
-  %gep.3 = getelementptr inbounds i32, i32* %ptr.1, i64 %iv
-  store i32 0 , i32* %gep.3, align 4
+  %gep.1 = getelementptr inbounds i32, ptr %ptr.3, i64 %iv
+  %offset.1 = load i32, ptr %gep.1, align 4
+  %gep.2 = getelementptr inbounds i32, ptr %ptr.2, i32 %offset.1
+  store i32 20, ptr %gep.2, align 4
+  %gep.3 = getelementptr inbounds i32, ptr %ptr.1, i64 %iv
+  store i32 0 , ptr %gep.3, align 4
   %offset.2 = add nsw i64 %iv, %X
-  %gep.4 = getelementptr inbounds i32, i32* %ptr.1, i64 %offset.2
-  store i32 10, i32* %gep.4, align 4
+  %gep.4 = getelementptr inbounds i32, ptr %ptr.1, i64 %offset.2
+  store i32 10, ptr %gep.4, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %N
   br i1 %exitcond, label %loop.exit, label %loop
@@ -77,7 +77,7 @@ exit:
 
 ; Alias set with uncomputable bounds contains a load and a store. This blocks
 ; vectorization, as we cannot generate runtime-checks for the set.
-define void @test3_uncomputable_bounds_load_store(i32* noalias %ptr.1, i32* noalias %ptr.2, i32* noalias %ptr.3, i64 %N, i64 %X) {
+define void @test3_uncomputable_bounds_load_store(ptr noalias %ptr.1, ptr noalias %ptr.2, ptr noalias %ptr.3, i64 %N, i64 %X) {
 ; CHECK-LABEL: define void @test3_uncomputable_bounds_load_store
 ; CHECK-NOT: vector.body
 
@@ -90,17 +90,17 @@ ph:
 
 loop:
   %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ]
-  %gep.1 = getelementptr inbounds i32, i32* %ptr.3, i64 %iv
-  %offset.1 = load i32, i32* %gep.1, align 4
-  %gep.2 = getelementptr inbounds i32, i32* %ptr.2, i32 %offset.1
-  store i32 20, i32* %gep.2, align 4
-  %gep.22 = getelementptr inbounds i32, i32* %ptr.2, i64 %iv
-  %lv = load i32, i32* %gep.22, align 4
-  %gep.3 = getelementptr inbounds i32, i32* %ptr.1, i64 %iv
-  store i32 %lv , i32* %gep.3, align 4
+  %gep.1 = getelementptr inbounds i32, ptr %ptr.3, i64 %iv
+  %offset.1 = load i32, ptr %gep.1, align 4
+  %gep.2 = getelementptr inbounds i32, ptr %ptr.2, i32 %offset.1
+  store i32 20, ptr %gep.2, align 4
+  %gep.22 = getelementptr inbounds i32, ptr %ptr.2, i64 %iv
+  %lv = load i32, ptr %gep.22, align 4
+  %gep.3 = getelementptr inbounds i32, ptr %ptr.1, i64 %iv
+  store i32 %lv , ptr %gep.3, align 4
   %offset.2 = add nsw i64 %iv, %X
-  %gep.4 = getelementptr inbounds i32, i32* %ptr.1, i64 %offset.2
-  store i32 %lv, i32* %gep.4, align 4
+  %gep.4 = getelementptr inbounds i32, ptr %ptr.1, i64 %offset.2
+  store i32 %lv, ptr %gep.4, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %N
   br i1 %exitcond, label %loop.exit, label %loop
@@ -114,7 +114,7 @@ exit:
 
 ; Alias set with uncomputable bounds contains a load and a store. This blocks
 ; vectorization, as we cannot generate runtime-checks for the set.
-define void @test4_uncomputable_bounds_store_store(i32* noalias %ptr.1, i32* noalias %ptr.2, i32* noalias %ptr.3, i64 %N, i64 %X) {
+define void @test4_uncomputable_bounds_store_store(ptr noalias %ptr.1, ptr noalias %ptr.2, ptr noalias %ptr.3, i64 %N, i64 %X) {
 ; CHECK-LABEL: define void @test4_uncomputable_bounds_store_store
 ; CHECK-NOT: vector.body
 
@@ -127,17 +127,17 @@ ph:
 
 loop:
   %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ]
-  %gep.1 = getelementptr inbounds i32, i32* %ptr.3, i64 %iv
-  %offset.1 = load i32, i32* %gep.1, align 4
-  %gep.2 = getelementptr inbounds i32, i32* %ptr.2, i32 %offset.1
-  store i32 20, i32* %gep.2, align 4
-  %gep.22 = getelementptr inbounds i32, i32* %ptr.2, i64 %iv
-  store i32 30, i32* %gep.22, align 4
-  %gep.3 = getelementptr inbounds i32, i32* %ptr.1, i64 %iv
-  store i32 0 , i32* %gep.3, align 4
+  %gep.1 = getelementptr inbounds i32, ptr %ptr.3, i64 %iv
+  %offset.1 = load i32, ptr %gep.1, align 4
+  %gep.2 = getelementptr inbounds i32, ptr %ptr.2, i32 %offset.1
+  store i32 20, ptr %gep.2, align 4
+  %gep.22 = getelementptr inbounds i32, ptr %ptr.2, i64 %iv
+  store i32 30, ptr %gep.22, align 4
+  %gep.3 = getelementptr inbounds i32, ptr %ptr.1, i64 %iv
+  store i32 0 , ptr %gep.3, align 4
   %offset.2 = add nsw i64 %iv, %X
-  %gep.4 = getelementptr inbounds i32, i32* %ptr.1, i64 %offset.2
-  store i32 10, i32* %gep.4, align 4
+  %gep.4 = getelementptr inbounds i32, ptr %ptr.1, i64 %offset.2
+  store i32 10, ptr %gep.4, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %N
   br i1 %exitcond, label %loop.exit, label %loop

diff  --git a/llvm/test/Transforms/LoopVectorize/align.ll b/llvm/test/Transforms/LoopVectorize/align.ll
index e75a4dd8d2a1c..a55f377448614 100644
--- a/llvm/test/Transforms/LoopVectorize/align.ll
+++ b/llvm/test/Transforms/LoopVectorize/align.ll
@@ -5,22 +5,22 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; Make sure we output the abi alignment if no alignment is specified.
 
 ;CHECK-LABEL: @align
-;CHECK: load <4 x i32>, <4 x i32>* {{.*}} align  4
-;CHECK: load <4 x i32>, <4 x i32>* {{.*}} align  4
+;CHECK: load <4 x i32>, ptr {{.*}} align  4
+;CHECK: load <4 x i32>, ptr {{.*}} align  4
 ;CHECK: store <4 x i32> {{.*}} align  4
 
-define void @align(i32* %a, i32* %b, i32* %c) nounwind uwtable ssp {
+define void @align(ptr %a, ptr %b, ptr %c) nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %3 = load i32, i32* %2
-  %4 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
-  %5 = load i32, i32* %4
+  %2 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %3 = load i32, ptr %2
+  %4 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
+  %5 = load i32, ptr %4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 %6, i32* %7
+  %7 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %6, ptr %7
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 128

diff  --git a/llvm/test/Transforms/LoopVectorize/assume.ll b/llvm/test/Transforms/LoopVectorize/assume.ll
index 6070fb1675282..0ca3e05d77c04 100644
--- a/llvm/test/Transforms/LoopVectorize/assume.ll
+++ b/llvm/test/Transforms/LoopVectorize/assume.ll
@@ -1,10 +1,10 @@
 ; RUN: opt < %s -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=2  -S | FileCheck %s
 
-define void @test1(float* noalias nocapture %a, float* noalias nocapture readonly %b) {
+define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
 ; CHECK-LABEL: @test1(
 ; CHECK:       vector.body:
-; CHECK:         [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* {{.*}}, align 4
-; CHECK:         [[WIDE_LOAD1:%.*]] = load <2 x float>, <2 x float>* {{.*}}, align 4
+; CHECK:         [[WIDE_LOAD:%.*]] = load <2 x float>, ptr {{.*}}, align 4
+; CHECK:         [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr {{.*}}, align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD]], <float 1.000000e+02, float 1.000000e+02>
 ; CHECK-NEXT:    [[TMP2:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD1]], <float 1.000000e+02, float 1.000000e+02>
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
@@ -20,13 +20,13 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp ogt float %0, 1.000000e+02
   tail call void @llvm.assume(i1 %cmp1)
   %add = fadd float %0, 1.000000e+00
-  %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, 1599
   br i1 %exitcond, label %for.end, label %for.body
@@ -39,9 +39,9 @@ declare void @llvm.assume(i1) #0
 
 attributes #0 = { nounwind willreturn }
 
-%struct.data = type { float*, float* }
+%struct.data = type { ptr, ptr }
 
-define void @test2(%struct.data* nocapture readonly %d) {
+define void @test2(ptr nocapture readonly %d) {
 ; CHECK-LABEL: @test2(
 ; CHECK:       entry:
 ; CHECK:         [[MASKCOND:%.*]] = icmp eq i64 %maskedptr, 0
@@ -53,14 +53,13 @@ define void @test2(%struct.data* nocapture readonly %d) {
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND4]])
 ; CHECK:       for.body:
 entry:
-  %b = getelementptr inbounds %struct.data, %struct.data* %d, i64 0, i32 1
-  %0 = load float*, float** %b, align 8
-  %ptrint = ptrtoint float* %0 to i64
+  %b = getelementptr inbounds %struct.data, ptr %d, i64 0, i32 1
+  %0 = load ptr, ptr %b, align 8
+  %ptrint = ptrtoint ptr %0 to i64
   %maskedptr = and i64 %ptrint, 31
   %maskcond = icmp eq i64 %maskedptr, 0
-  %a = getelementptr inbounds %struct.data, %struct.data* %d, i64 0, i32 0
-  %1 = load float*, float** %a, align 8
-  %ptrint2 = ptrtoint float* %1 to i64
+  %1 = load ptr, ptr %d, align 8
+  %ptrint2 = ptrtoint ptr %1 to i64
   %maskedptr3 = and i64 %ptrint2, 31
   %maskcond4 = icmp eq i64 %maskedptr3, 0
   br label %for.body
@@ -69,12 +68,12 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   tail call void @llvm.assume(i1 %maskcond)
-  %arrayidx = getelementptr inbounds float, float* %0, i64 %indvars.iv
-  %2 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %0, i64 %indvars.iv
+  %2 = load float, ptr %arrayidx, align 4
   %add = fadd float %2, 1.000000e+00
   tail call void @llvm.assume(i1 %maskcond4)
-  %arrayidx5 = getelementptr inbounds float, float* %1, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %1, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, 1599
   br i1 %exitcond, label %for.end, label %for.body
@@ -86,7 +85,7 @@ for.end:                                          ; preds = %for.body
 ; Test case for PR43620. Make sure we can vectorize with predication in presence
 ; of assume calls. For now, check that we drop all assumes in predicated blocks
 ; in the vector body.
-define void @predicated_assume(float* noalias nocapture readonly %a, float* noalias nocapture %b, i32 %n) {
+define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i32 %n) {
 ; Check that the vector.body does not contain any assumes.
 ; CHECK-LABEL: @predicated_assume(
 ; CHECK:       vector.body:
@@ -118,11 +117,11 @@ if.else:                                          ; preds = %for.body
 
 if.end5:                                          ; preds = %for.body, %if.else
   %x.0 = phi float [ 4.200000e+01, %if.else ], [ 2.300000e+01, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx, align 4
   %mul = fmul float %x.0, %1
-  %arrayidx7 = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  store float %mul, float* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  store float %mul, ptr %arrayidx7, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %cmp = icmp eq i64 %indvars.iv.next, %0
   br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/bsd_regex.ll b/llvm/test/Transforms/LoopVectorize/bsd_regex.ll
index 9a050b004b966..f2111081ffca9 100644
--- a/llvm/test/Transforms/LoopVectorize/bsd_regex.ll
+++ b/llvm/test/Transforms/LoopVectorize/bsd_regex.ll
@@ -8,7 +8,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; When scalarizing stores we need to preserve the original order.
 ; Make sure that we are extracting in the correct order (0101, and not 0011).
 
-define i32 @foo(i32* nocapture %A) {
+define i32 @foo(ptr nocapture %A) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -21,17 +21,17 @@ define i32 @foo(i32* nocapture %A) {
 ; CHECK-NEXT:    [[STEP_ADD:%.*]] = shl <2 x i64> [[VEC_IND]], <i64 2, i64 2>
 ; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i64> [[STEP_ADD]], <i64 8, i64 8>
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP0]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i64> [[TMP1]], i64 1
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]]
-; CHECK-NEXT:    store i32 4, i32* [[TMP3]], align 4
-; CHECK-NEXT:    store i32 4, i32* [[TMP5]], align 4
-; CHECK-NEXT:    store i32 4, i32* [[TMP7]], align 4
-; CHECK-NEXT:    store i32 4, i32* [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]]
+; CHECK-NEXT:    store i32 4, ptr [[TMP3]], align 4
+; CHECK-NEXT:    store i32 4, ptr [[TMP5]], align 4
+; CHECK-NEXT:    store i32 4, ptr [[TMP7]], align 4
+; CHECK-NEXT:    store i32 4, ptr [[TMP9]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
@@ -51,8 +51,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = shl nsw i64 %indvars.iv, 2
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0
-  store i32 4, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %0
+  store i32 4, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 10000

diff  --git a/llvm/test/Transforms/LoopVectorize/calloc.ll b/llvm/test/Transforms/LoopVectorize/calloc.ll
index 2f3446fb30045..67ea79fc494a1 100644
--- a/llvm/test/Transforms/LoopVectorize/calloc.ll
+++ b/llvm/test/Transforms/LoopVectorize/calloc.ll
@@ -6,11 +6,11 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK: zext <4 x i8>
 ;CHECK: ret
 
-define noalias i8* @hexit(i8* nocapture %bytes, i64 %length) nounwind uwtable ssp {
+define noalias ptr @hexit(ptr nocapture %bytes, i64 %length) nounwind uwtable ssp {
 entry:
   %shl = shl i64 %length, 1
   %add28 = or i64 %shl, 1
-  %call = tail call i8* @calloc(i64 1, i64 %add28) nounwind
+  %call = tail call ptr @calloc(i64 1, i64 %add28) nounwind
   %cmp29 = icmp eq i64 %shl, 0
   br i1 %cmp29, label %for.end, label %for.body.lr.ph
 
@@ -21,8 +21,8 @@ for.body.lr.ph:                                   ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %i.030 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %shr = lshr i64 %i.030, 1
-  %arrayidx = getelementptr inbounds i8, i8* %bytes, i64 %shr
-  %1 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %bytes, i64 %shr
+  %1 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %1 to i32
   %and = shl i64 %i.030, 2
   %neg = and i64 %and, 4
@@ -36,14 +36,14 @@ for.body:                                         ; preds = %for.body, %for.body
   %cond = select i1 %cmp15, i32 87, i32 48
   %add17 = add nsw i32 %cond, %shr11
   %conv18 = trunc i32 %add17 to i8
-  %arrayidx19 = getelementptr inbounds i8, i8* %call, i64 %i.030
-  store i8 %conv18, i8* %arrayidx19, align 1
+  %arrayidx19 = getelementptr inbounds i8, ptr %call, i64 %i.030
+  store i8 %conv18, ptr %arrayidx19, align 1
   %inc = add i64 %i.030, 1
   %exitcond = icmp eq i64 %inc, %0
   br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.body, %entry
-  ret i8* %call
+  ret ptr %call
 }
 
-declare noalias i8* @calloc(i64, i64) nounwind
+declare noalias ptr @calloc(i64, i64) nounwind

diff  --git a/llvm/test/Transforms/LoopVectorize/cast-induction.ll b/llvm/test/Transforms/LoopVectorize/cast-induction.ll
index 7b9ba15d59333..a4433bc7f00d4 100644
--- a/llvm/test/Transforms/LoopVectorize/cast-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/cast-induction.ll
@@ -28,9 +28,9 @@ entry:
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %gep = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %iv
+  %gep = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %iv
   %iv.trunc = trunc i64 %iv to i32
-  store i32 %iv.trunc, i32* %gep, align 4
+  store i32 %iv.trunc, ptr %gep, align 4
   %iv.next = add i64 %iv, 1
   %iv.next.trunc = trunc i64 %iv.next to i32
   %exitcond = icmp eq i32 %iv.next.trunc, 1024

diff  --git a/llvm/test/Transforms/LoopVectorize/check-prof-info.ll b/llvm/test/Transforms/LoopVectorize/check-prof-info.ll
index 78497f06def9c..17013c5908065 100644
--- a/llvm/test/Transforms/LoopVectorize/check-prof-info.ll
+++ b/llvm/test/Transforms/LoopVectorize/check-prof-info.ll
@@ -28,14 +28,14 @@ for.cond.cleanup:                                 ; preds = %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @b, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4, !tbaa !2
   %1 = trunc i64 %indvars.iv to i32
   %mul = mul nsw i32 %0, %1
-  %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @a, i64 0, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx2, align 4, !tbaa !2
+  %arrayidx2 = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx2, align 4, !tbaa !2
   %add = add nsw i32 %2, %mul
-  store i32 %add, i32* %arrayidx2, align 4, !tbaa !2
+  store i32 %add, ptr %arrayidx2, align 4, !tbaa !2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !prof !6
@@ -62,14 +62,14 @@ for.cond.cleanup:                                 ; preds = %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @b, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4, !tbaa !2
   %1 = trunc i64 %indvars.iv to i32
   %mul = mul nsw i32 %0, %1
-  %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @a, i64 0, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx2, align 4, !tbaa !2
+  %arrayidx2 = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx2, align 4, !tbaa !2
   %add = add nsw i32 %2, %mul
-  store i32 %add, i32* %arrayidx2, align 4, !tbaa !2
+  store i32 %add, ptr %arrayidx2, align 4, !tbaa !2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1027
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !prof !7

diff  --git a/llvm/test/Transforms/LoopVectorize/conditional-assignment.ll b/llvm/test/Transforms/LoopVectorize/conditional-assignment.ll
index 3d020ddaf3642..9a2aa299e5d16 100644
--- a/llvm/test/Transforms/LoopVectorize/conditional-assignment.ll
+++ b/llvm/test/Transforms/LoopVectorize/conditional-assignment.ll
@@ -5,19 +5,19 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: nounwind ssp uwtable
-define void @conditional_store(i32* noalias nocapture %indices) #0 !dbg !4 {
+define void @conditional_store(ptr noalias nocapture %indices) #0 !dbg !4 {
 entry:
   br label %for.body, !dbg !10
 
 for.body:                                         ; preds = %for.inc, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
-  %arrayidx = getelementptr inbounds i32, i32* %indices, i64 %indvars.iv, !dbg !12
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !12, !tbaa !14
+  %arrayidx = getelementptr inbounds i32, ptr %indices, i64 %indvars.iv, !dbg !12
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !12, !tbaa !14
   %cmp1 = icmp eq i32 %0, 1024, !dbg !12
   br i1 %cmp1, label %if.then, label %for.inc, !dbg !12
 
 if.then:                                          ; preds = %for.body
-  store i32 0, i32* %arrayidx, align 4, !dbg !18, !tbaa !14
+  store i32 0, ptr %arrayidx, align 4, !dbg !18, !tbaa !14
   br label %for.inc, !dbg !18
 
 for.inc:                                          ; preds = %for.body, %if.then

diff  --git a/llvm/test/Transforms/LoopVectorize/control-flow.ll b/llvm/test/Transforms/LoopVectorize/control-flow.ll
index 9277f1c0cb383..a27f2f0841bca 100644
--- a/llvm/test/Transforms/LoopVectorize/control-flow.ll
+++ b/llvm/test/Transforms/LoopVectorize/control-flow.ll
@@ -20,7 +20,7 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: nounwind optsize ssp uwtable
-define i32 @_Z4testPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 {
+define i32 @_Z4testPii(ptr nocapture %A, i32 %Length) #0 !dbg !4 {
 entry:
   %cmp8 = icmp sgt i32 %Length, 0, !dbg !10
   br i1 %cmp8, label %for.body.preheader, label %end, !dbg !10
@@ -30,13 +30,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %if.else
   %indvars.iv = phi i64 [ %indvars.iv.next, %if.else ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !12
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !12, !tbaa !15
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !12
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !12, !tbaa !15
   %cmp1 = icmp sgt i32 %0, 10, !dbg !12
   br i1 %cmp1, label %end.loopexit, label %if.else, !dbg !12
 
 if.else:                                          ; preds = %for.body
-  store i32 0, i32* %arrayidx, align 4, !dbg !19, !tbaa !15
+  store i32 0, ptr %arrayidx, align 4, !dbg !19, !tbaa !15
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
   %1 = trunc i64 %indvars.iv.next to i32, !dbg !10
   %cmp = icmp slt i32 %1, %Length, !dbg !10

diff  --git a/llvm/test/Transforms/LoopVectorize/cpp-new-array.ll b/llvm/test/Transforms/LoopVectorize/cpp-new-array.ll
index bf7fa7c9da175..1d81f0658e721 100644
--- a/llvm/test/Transforms/LoopVectorize/cpp-new-array.ll
+++ b/llvm/test/Transforms/LoopVectorize/cpp-new-array.ll
@@ -9,37 +9,34 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK: ret i32
 define i32 @cpp_new_arrays() uwtable ssp {
 entry:
-  %call = call noalias i8* @_Znwm(i64 4)
-  %0 = bitcast i8* %call to float*
-  store float 1.000000e+03, float* %0, align 4
-  %call1 = call noalias i8* @_Znwm(i64 4)
-  %1 = bitcast i8* %call1 to float*
-  store float 1.000000e+03, float* %1, align 4
-  %call3 = call noalias i8* @_Znwm(i64 4)
-  %2 = bitcast i8* %call3 to float*
-  store float 1.000000e+03, float* %2, align 4
+  %call = call noalias ptr @_Znwm(i64 4)
+  store float 1.000000e+03, ptr %call, align 4
+  %call1 = call noalias ptr @_Znwm(i64 4)
+  store float 1.000000e+03, ptr %call1, align 4
+  %call3 = call noalias ptr @_Znwm(i64 4)
+  store float 1.000000e+03, ptr %call3, align 4
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %idxprom = sext i32 %i.01 to i64
-  %arrayidx = getelementptr inbounds float, float* %0, i64 %idxprom
-  %3 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %call, i64 %idxprom
+  %0 = load float, ptr %arrayidx, align 4
   %idxprom5 = sext i32 %i.01 to i64
-  %arrayidx6 = getelementptr inbounds float, float* %1, i64 %idxprom5
-  %4 = load float, float* %arrayidx6, align 4
-  %add = fadd float %3, %4
+  %arrayidx6 = getelementptr inbounds float, ptr %call1, i64 %idxprom5
+  %1 = load float, ptr %arrayidx6, align 4
+  %add = fadd float %0, %1
   %idxprom7 = sext i32 %i.01 to i64
-  %arrayidx8 = getelementptr inbounds float, float* %2, i64 %idxprom7
-  store float %add, float* %arrayidx8, align 4
+  %arrayidx8 = getelementptr inbounds float, ptr %call3, i64 %idxprom7
+  store float %add, ptr %arrayidx8, align 4
   %inc = add nsw i32 %i.01, 1
   %cmp = icmp slt i32 %inc, 1000
   br i1 %cmp, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
-  %5 = load float, float* %2, align 4
-  %conv10 = fptosi float %5 to i32
+  %2 = load float, ptr %call3, align 4
+  %conv10 = fptosi float %2 to i32
   ret i32 %conv10
 }
 
-declare noalias i8* @_Znwm(i64)
+declare noalias ptr @_Znwm(i64)

diff  --git a/llvm/test/Transforms/LoopVectorize/dbg.value.ll b/llvm/test/Transforms/LoopVectorize/dbg.value.ll
index d05b89b114ca5..16d6f8bdde306 100644
--- a/llvm/test/Transforms/LoopVectorize/dbg.value.ll
+++ b/llvm/test/Transforms/LoopVectorize/dbg.value.ll
@@ -18,13 +18,13 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   ;CHECK: load <4 x i32>
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv, !dbg !23
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !23
-  %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @C, i64 0, i64 %indvars.iv, !dbg !23
-  %1 = load i32, i32* %arrayidx2, align 4, !dbg !23
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %indvars.iv, !dbg !23
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !23
+  %arrayidx2 = getelementptr inbounds [1024 x i32], ptr @C, i64 0, i64 %indvars.iv, !dbg !23
+  %1 = load i32, ptr %arrayidx2, align 4, !dbg !23
   %add = add nsw i32 %1, %0, !dbg !23
-  %arrayidx4 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv, !dbg !23
-  store i32 %add, i32* %arrayidx4, align 4, !dbg !23
+  %arrayidx4 = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv, !dbg !23
+  store i32 %add, ptr %arrayidx4, align 4, !dbg !23
   %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !22
   tail call void @llvm.dbg.value(metadata !12, metadata !19, metadata !21), !dbg !22
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !22

diff  --git a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll
index a1aa330bb75a3..eeb270c4be545 100644
--- a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll
+++ b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll
@@ -13,8 +13,8 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 ; CHECK:       %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; CHECK:       %[[I0:.+]] = add i64 %index, 0
 ; CHECK:       %[[I2:.+]] = add i64 %index, 2
-; CHECK:       getelementptr inbounds i64, i64* %a, i64 %[[I0]]
-; CHECK:       getelementptr inbounds i64, i64* %a, i64 %[[I2]]
+; CHECK:       getelementptr inbounds i64, ptr %a, i64 %[[I0]]
+; CHECK:       getelementptr inbounds i64, ptr %a, i64 %[[I2]]
 ; CHECK-NOT:   add nuw nsw i64 %[[I0]], 1
 ; CHECK-NOT:   add nuw nsw i64 %[[I2]], 1
 ; CHECK-NOT:   icmp slt i64 {{.*}}, %n
@@ -22,15 +22,15 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 ; CHECK:       %[[CMP:.+]] = icmp eq i64 %index.next, %n.vec
 ; CHECK:       br i1 %[[CMP]], label %middle.block, label %vector.body
 ;
-define i64 @dead_instructions_01(i64 *%a, i64 %n) {
+define i64 @dead_instructions_01(ptr %a, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
   %r = phi i64 [ %tmp2, %for.body ], [ 0, %entry ]
-  %tmp0 = getelementptr inbounds i64, i64* %a, i64 %i
-  %tmp1 = load i64, i64* %tmp0, align 8
+  %tmp0 = getelementptr inbounds i64, ptr %a, i64 %i
+  %tmp1 = load i64, ptr %tmp0, align 8
   %tmp2 = add i64 %tmp1, %r
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
@@ -51,7 +51,7 @@ for.end:
 ;
 ; CHECK:     vector.body:
 ;
-define void @pr47390(i32 *%a) {
+define void @pr47390(ptr %a) {
 entry:
   br label %loop
 
@@ -64,8 +64,8 @@ loop:
   %secondary = phi i32 [ 1, %entry ], [ %secondary_add, %loop ]
   %primary_add = add i32 %primary, 1
   %secondary_add = add i32 %secondary, 1
-  %gep = getelementptr inbounds i32, i32* %a, i32 %secondary
-  %load = load i32, i32* %gep, align 8
+  %gep = getelementptr inbounds i32, ptr %a, i32 %secondary
+  %load = load i32, ptr %gep, align 8
   %cmp = icmp eq i32 %secondary, 5
   br i1 %cmp, label %exit, label %loop
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/debugloc.ll b/llvm/test/Transforms/LoopVectorize/debugloc.ll
index 4c16354640461..2e6b8745ad4e6 100644
--- a/llvm/test/Transforms/LoopVectorize/debugloc.ll
+++ b/llvm/test/Transforms/LoopVectorize/debugloc.ll
@@ -9,9 +9,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK:   min.iters.check = icmp ult i64 {{.*}}, 2, !dbg !{{[0-9]+}}
 ; CHECK: vector.body
 ; CHECK:   index {{.*}}, !dbg ![[LOC1:[0-9]+]]
-; CHECK:   getelementptr inbounds i32, i32* %a, {{.*}}, !dbg ![[LOC2:[0-9]+]]
-; CHECK:   getelementptr inbounds i32, i32* %b, {{.*}}, !dbg ![[LOC1]]
-; CHECK:   load <2 x i32>, <2 x i32>* {{.*}}, !dbg ![[LOC1]]
+; CHECK:   getelementptr inbounds i32, ptr %a, {{.*}}, !dbg ![[LOC2:[0-9]+]]
+; CHECK:   getelementptr inbounds i32, ptr %b, {{.*}}, !dbg ![[LOC1]]
+; CHECK:   load <2 x i32>, ptr {{.*}}, !dbg ![[LOC1]]
 ; CHECK:   add <2 x i32> {{.*}}, !dbg ![[LOC1]]
 ; CHECK:   add nuw i64 %index, 2, !dbg ![[LOC1]]
 ; CHECK:   icmp eq i64 %index.next, %n.vec, !dbg ![[LOC1]]
@@ -20,9 +20,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK: for.body
 ; CHECK: br i1{{.*}}, label %for.body,{{.*}}, !dbg ![[BR_LOC]],
 
-define i32 @f(i32* nocapture %a, i32* %b, i32 %size) !dbg !4 {
+define i32 @f(ptr nocapture %a, ptr %b, i32 %size) !dbg !4 {
 entry:
-  call void @llvm.dbg.value(metadata i32* %a, metadata !13, metadata !DIExpression()), !dbg !19
+  call void @llvm.dbg.value(metadata ptr %a, metadata !13, metadata !DIExpression()), !dbg !19
   call void @llvm.dbg.value(metadata i32 %size, metadata !14, metadata !DIExpression()), !dbg !19
   call void @llvm.dbg.value(metadata i32 0, metadata !15, metadata !DIExpression()), !dbg !20
   call void @llvm.dbg.value(metadata i32 0, metadata !16, metadata !DIExpression()), !dbg !21
@@ -35,10 +35,10 @@ for.body.lr.ph:                                   ; preds = %entry
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %sum = phi i32 [ 0, %for.body.lr.ph ], [ %sum.next, %for.body ]
-  %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv, !dbg !19
-  %arrayidx.2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv, !dbg !22
-  %l.1 = load i32, i32* %arrayidx.1, align 4, !dbg !22
-  %l.2 = load i32, i32* %arrayidx.2, align 4, !dbg !22
+  %arrayidx.1 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv, !dbg !19
+  %arrayidx.2 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv, !dbg !22
+  %l.1 = load i32, ptr %arrayidx.1, align 4, !dbg !22
+  %l.2 = load i32, ptr %arrayidx.2, align 4, !dbg !22
   %add.1 = add i32 %l.1, %l.2
   %sum.next = add i32 %add.1, %sum, !dbg !22
   %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !22
@@ -57,10 +57,10 @@ for.end:                                          ; preds = %entry, %for.cond.fo
   ret i32 %sum.0.lcssa, !dbg !26
 }
 
-define i32 @test_debug_loc_on_branch_in_loop(i32* noalias %src, i32* noalias %dst) {
+define i32 @test_debug_loc_on_branch_in_loop(ptr noalias %src, ptr noalias %dst) {
 ; CHECK-LABEL: define i32 @test_debug_loc_on_branch_in_loop(
 ; CHECK-LABEL: vector.body:
-; CHECK:        [[LOAD:%.+]] = load <2 x i32>, <2 x i32>* {{.+}}, align 4
+; CHECK:        [[LOAD:%.+]] = load <2 x i32>, ptr {{.+}}, align 4
 ; CHECK-NEXT:   [[CMP:%.+]] = icmp eq <2 x i32> [[LOAD]], <i32 10, i32 10>
 ; CHECK-NEXT:   [[XOR:%.+]] = xor <2 x i1> [[CMP:%.+]], <i1 true, i1 true>, !dbg [[LOC3:!.+]]
 ; CHECK-NEXT:   [[EXT:%.+]] = extractelement <2 x i1> [[XOR]], i32 0, !dbg [[LOC3]]
@@ -68,8 +68,8 @@ define i32 @test_debug_loc_on_branch_in_loop(i32* noalias %src, i32* noalias %ds
 ; CHECK-NOT:  !dbg
 ; CHECK-EMPTY:
 ; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT:   [[GEP:%.+]] = getelementptr inbounds i32, i32* %dst, i64 {{.+}}, !dbg [[LOC3]]
-; CHECK-NEXT:   store i32 0, i32* [[GEP]], align 4, !dbg [[LOC3]]
+; CHECK-NEXT:   [[GEP:%.+]] = getelementptr inbounds i32, ptr %dst, i64 {{.+}}, !dbg [[LOC3]]
+; CHECK-NEXT:   store i32 0, ptr [[GEP]], align 4, !dbg [[LOC3]]
 ; CHECK-NEXT:   br label %pred.store.continue, !dbg [[LOC3]]
 ; CHECK-EMPTY:
 ;
@@ -78,14 +78,14 @@ entry:
 
 loop.header:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
-  %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
-  %l = load i32, i32* %gep.src, align 4
+  %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+  %l = load i32, ptr %gep.src, align 4
   %cmp = icmp eq i32 %l, 10
   br i1 %cmp, label %loop.latch, label %if.then, !dbg !28
 
 if.then:
-  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 %iv
-  store i32 0, i32* %gep.dst, align 4
+  %gep.dst = getelementptr inbounds i32, ptr %dst, i64 %iv
+  store i32 0, ptr %gep.dst, align 4
   br label %loop.latch
 
 loop.latch:
@@ -97,10 +97,10 @@ exit:
   ret i32 0
 }
 
-define i32 @test_
diff erent_debug_loc_on_replicate_recipe(i32* noalias %src, i32* noalias %dst) {
+define i32 @test_
diff erent_debug_loc_on_replicate_recipe(ptr noalias %src, ptr noalias %dst) {
 ; CHECK-LABEL: define i32 @test_
diff erent_debug_loc_on_replicate_recipe(
 ; CHECK-LABEL: vector.body:
-; CHECK:        [[LOAD:%.+]] = load <2 x i32>, <2 x i32>* {{.+}}, align 4
+; CHECK:        [[LOAD:%.+]] = load <2 x i32>, ptr {{.+}}, align 4
 ; CHECK-NEXT:   [[CMP:%.+]] = icmp eq <2 x i32> [[LOAD]], <i32 10, i32 10>
 ; CHECK-NEXT:   [[XOR:%.+]] = xor <2 x i1> [[CMP:%.+]], <i1 true, i1 true>, !dbg [[LOC4:!.+]]
 ; CHECK-NEXT:   [[EXT:%.+]] = extractelement <2 x i1> [[XOR]], i32 0, !dbg [[LOC4]]
@@ -108,8 +108,8 @@ define i32 @test_
diff erent_debug_loc_on_replicate_recipe(i32* noalias %src, i32*
 ; CHECK-NOT:  !dbg
 ; CHECK-EMPTY:
 ; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT:   [[GEP:%.+]] = getelementptr inbounds i32, i32* %dst, i64 {{.+}}, !dbg [[LOC5:!.+]]
-; CHECK-NEXT:   store i32 0, i32* [[GEP]], align 4, !dbg [[LOC5]]
+; CHECK-NEXT:   [[GEP:%.+]] = getelementptr inbounds i32, ptr %dst, i64 {{.+}}, !dbg [[LOC5:!.+]]
+; CHECK-NEXT:   store i32 0, ptr [[GEP]], align 4, !dbg [[LOC5]]
 ; CHECK-NEXT:   br label %pred.store.continue, !dbg [[LOC4]]
 ; CHECK-EMPTY:
 ;
@@ -118,14 +118,14 @@ entry:
 
 loop.header:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
-  %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
-  %l = load i32, i32* %gep.src, align 4
+  %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+  %l = load i32, ptr %gep.src, align 4
   %cmp = icmp eq i32 %l, 10
   br i1 %cmp, label %loop.latch, label %if.then, !dbg !33
 
 if.then:
-  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 %iv, !dbg !34
-  store i32 0, i32* %gep.dst, align 4
+  %gep.dst = getelementptr inbounds i32, ptr %dst, i64 %iv, !dbg !34
+  store i32 0, ptr %gep.dst, align 4
   br label %loop.latch
 
 loop.latch:

diff  --git a/llvm/test/Transforms/LoopVectorize/demanded-bits-of-pointer-instruction.ll b/llvm/test/Transforms/LoopVectorize/demanded-bits-of-pointer-instruction.ll
index 0a46685b04214..41756ffb64e6c 100644
--- a/llvm/test/Transforms/LoopVectorize/demanded-bits-of-pointer-instruction.ll
+++ b/llvm/test/Transforms/LoopVectorize/demanded-bits-of-pointer-instruction.ll
@@ -4,15 +4,15 @@
 ; Only make sure we do not crash.
 
 ; CHECK: @test
-define void @test(i8* %ptr, i8* %ptr_end) {
+define void @test(ptr %ptr, ptr %ptr_end) {
 start:
   br label %loop
 
 loop:
-  %ptr2 = phi i8* [ %ptr3, %loop ], [ %ptr, %start ]
+  %ptr2 = phi ptr [ %ptr3, %loop ], [ %ptr, %start ]
   %x = sext i8 undef to i64
-  %ptr3 = getelementptr inbounds i8, i8* %ptr2, i64 1
-  %cmp = icmp ult i8* %ptr3, %ptr_end
+  %ptr3 = getelementptr inbounds i8, ptr %ptr2, i64 1
+  %cmp = icmp ult ptr %ptr3, %ptr_end
   br i1 %cmp, label %loop, label %end
 
 end:

diff  --git a/llvm/test/Transforms/LoopVectorize/diag-missing-instr-debug-loc.ll b/llvm/test/Transforms/LoopVectorize/diag-missing-instr-debug-loc.ll
index ce5d8723a60b9..8ce2a979b1e46 100644
--- a/llvm/test/Transforms/LoopVectorize/diag-missing-instr-debug-loc.ll
+++ b/llvm/test/Transforms/LoopVectorize/diag-missing-instr-debug-loc.ll
@@ -40,11 +40,11 @@ for.body:                                         ; preds = %for.body.preheader,
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %a.addr.08 = phi i32 [ %0, %for.body ], [ %a, %for.body.preheader ]
 
-  %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @out, i64 0, i64 %indvars.iv, !dbg !10
-  store i32 %a.addr.08, i32* %arrayidx, align 4, !dbg !12, !tbaa !13
+  %arrayidx = getelementptr inbounds [0 x i32], ptr @out, i64 0, i64 %indvars.iv, !dbg !10
+  store i32 %a.addr.08, ptr %arrayidx, align 4, !dbg !12, !tbaa !13
   %idxprom1 = sext i32 %a.addr.08 to i64, !dbg !17
-  %arrayidx2 = getelementptr inbounds [0 x i32], [0 x i32]* @map, i64 0, i64 %idxprom1, !dbg !17
-  %0 = load i32, i32* %arrayidx2, align 4, !dbg !17, !tbaa !13
+  %arrayidx2 = getelementptr inbounds [0 x i32], ptr @map, i64 0, i64 %idxprom1, !dbg !17
+  %0 = load i32, ptr %arrayidx2, align 4, !dbg !17, !tbaa !13
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !9
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count, !dbg !9
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !9, !llvm.loop !18

diff  --git a/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info-2.ll b/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info-2.ll
index 2c6c25d3e214c..ed107b10dcd98 100644
--- a/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info-2.ll
@@ -3,21 +3,21 @@
 ;   1	void cold(char *A, char *B, char *C, char *D, char *E, int N) {
 ;   2	  for(int i = 0; i < N; i++) {
 ;   3	    A[i + 1] = A[i] + B[i];
-;   4	    C[i] = D[i] * E[i];
+;   4	    C[i] = Dptr E[i];
 ;   5	  }
 ;   6	}
 ;   7
 ;   8	void hot(char *A, char *B, char *C, char *D, char *E, int N) {
 ;   9	  for(int i = 0; i < N; i++) {
 ;  10	    A[i + 1] = A[i] + B[i];
-;  11	    C[i] = D[i] * E[i];
+;  11	    C[i] = Dptr E[i];
 ;  12	  }
 ;  13	}
 ;  14
 ;  15	void unknown(char *A, char *B, char *C, char *D, char *E, int N) {
 ;  16	  for(int i = 0; i < N; i++) {
 ;  17	    A[i + 1] = A[i] + B[i];
-;  18	    C[i] = D[i] * E[i];
+;  18	    C[i] = Dptr E[i];
 ;  19	  }
 ;  20	}
 
@@ -33,7 +33,7 @@ source_filename = "/tmp/s.c"
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: norecurse nounwind ssp uwtable
-define void @cold(i8* nocapture %A, i8* nocapture readonly %B, i8* nocapture %C, i8* nocapture readonly %D, i8* nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !7 !prof !56 {
+define void @cold(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture %C, ptr nocapture readonly %D, ptr nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !7 !prof !56 {
 entry:
   %cmp28 = icmp sgt i32 %N, 0, !dbg !9
   br i1 %cmp28, label %ph, label %for.cond.cleanup, !dbg !10, !prof !58
@@ -43,21 +43,21 @@ ph:
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !12
-  %0 = load i8, i8* %arrayidx, align 1, !dbg !12, !tbaa !13
-  %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !16
-  %1 = load i8, i8* %arrayidx2, align 1, !dbg !16, !tbaa !13
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %indvars.iv, !dbg !12
+  %0 = load i8, ptr %arrayidx, align 1, !dbg !12, !tbaa !13
+  %arrayidx2 = getelementptr inbounds i8, ptr %B, i64 %indvars.iv, !dbg !16
+  %1 = load i8, ptr %arrayidx2, align 1, !dbg !16, !tbaa !13
   %add = add i8 %1, %0, !dbg !17
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
-  %arrayidx7 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv.next, !dbg !18
-  store i8 %add, i8* %arrayidx7, align 1, !dbg !19, !tbaa !13
-  %arrayidx9 = getelementptr inbounds i8, i8* %D, i64 %indvars.iv, !dbg !20
-  %2 = load i8, i8* %arrayidx9, align 1, !dbg !20, !tbaa !13
-  %arrayidx12 = getelementptr inbounds i8, i8* %E, i64 %indvars.iv, !dbg !21
-  %3 = load i8, i8* %arrayidx12, align 1, !dbg !21, !tbaa !13
+  %arrayidx7 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv.next, !dbg !18
+  store i8 %add, ptr %arrayidx7, align 1, !dbg !19, !tbaa !13
+  %arrayidx9 = getelementptr inbounds i8, ptr %D, i64 %indvars.iv, !dbg !20
+  %2 = load i8, ptr %arrayidx9, align 1, !dbg !20, !tbaa !13
+  %arrayidx12 = getelementptr inbounds i8, ptr %E, i64 %indvars.iv, !dbg !21
+  %3 = load i8, ptr %arrayidx12, align 1, !dbg !21, !tbaa !13
   %mul = mul i8 %3, %2, !dbg !22
-  %arrayidx16 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !23
-  store i8 %mul, i8* %arrayidx16, align 1, !dbg !24, !tbaa !13
+  %arrayidx16 = getelementptr inbounds i8, ptr %C, i64 %indvars.iv, !dbg !23
+  store i8 %mul, ptr %arrayidx16, align 1, !dbg !24, !tbaa !13
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10
   %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !10
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !10, !llvm.loop !25, !prof !59
@@ -67,7 +67,7 @@ for.cond.cleanup:
 }
 
 ; Function Attrs: norecurse nounwind ssp uwtable
-define void @hot(i8* nocapture %A, i8* nocapture readonly %B, i8* nocapture %C, i8* nocapture readonly %D, i8* nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !26 !prof !57 {
+define void @hot(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture %C, ptr nocapture readonly %D, ptr nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !26 !prof !57 {
 entry:
   %cmp28 = icmp sgt i32 %N, 0, !dbg !27
   br i1 %cmp28, label %ph, label %for.cond.cleanup, !dbg !28, !prof !58
@@ -77,21 +77,21 @@ ph:
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !30
-  %0 = load i8, i8* %arrayidx, align 1, !dbg !30, !tbaa !13
-  %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !31
-  %1 = load i8, i8* %arrayidx2, align 1, !dbg !31, !tbaa !13
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %indvars.iv, !dbg !30
+  %0 = load i8, ptr %arrayidx, align 1, !dbg !30, !tbaa !13
+  %arrayidx2 = getelementptr inbounds i8, ptr %B, i64 %indvars.iv, !dbg !31
+  %1 = load i8, ptr %arrayidx2, align 1, !dbg !31, !tbaa !13
   %add = add i8 %1, %0, !dbg !32
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !28
-  %arrayidx7 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv.next, !dbg !33
-  store i8 %add, i8* %arrayidx7, align 1, !dbg !34, !tbaa !13
-  %arrayidx9 = getelementptr inbounds i8, i8* %D, i64 %indvars.iv, !dbg !35
-  %2 = load i8, i8* %arrayidx9, align 1, !dbg !35, !tbaa !13
-  %arrayidx12 = getelementptr inbounds i8, i8* %E, i64 %indvars.iv, !dbg !36
-  %3 = load i8, i8* %arrayidx12, align 1, !dbg !36, !tbaa !13
+  %arrayidx7 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv.next, !dbg !33
+  store i8 %add, ptr %arrayidx7, align 1, !dbg !34, !tbaa !13
+  %arrayidx9 = getelementptr inbounds i8, ptr %D, i64 %indvars.iv, !dbg !35
+  %2 = load i8, ptr %arrayidx9, align 1, !dbg !35, !tbaa !13
+  %arrayidx12 = getelementptr inbounds i8, ptr %E, i64 %indvars.iv, !dbg !36
+  %3 = load i8, ptr %arrayidx12, align 1, !dbg !36, !tbaa !13
   %mul = mul i8 %3, %2, !dbg !37
-  %arrayidx16 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !38
-  store i8 %mul, i8* %arrayidx16, align 1, !dbg !39, !tbaa !13
+  %arrayidx16 = getelementptr inbounds i8, ptr %C, i64 %indvars.iv, !dbg !38
+  store i8 %mul, ptr %arrayidx16, align 1, !dbg !39, !tbaa !13
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !28
   %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !28
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !28, !llvm.loop !40, !prof !59
@@ -101,7 +101,7 @@ for.cond.cleanup:
 }
 
 ; Function Attrs: norecurse nounwind ssp uwtable
-define void @unknown(i8* nocapture %A, i8* nocapture readonly %B, i8* nocapture %C, i8* nocapture readonly %D, i8* nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !41 {
+define void @unknown(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture %C, ptr nocapture readonly %D, ptr nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !41 {
 entry:
   %cmp28 = icmp sgt i32 %N, 0, !dbg !42
   br i1 %cmp28, label %ph, label %for.cond.cleanup, !dbg !43
@@ -111,21 +111,21 @@ ph:
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !45
-  %0 = load i8, i8* %arrayidx, align 1, !dbg !45, !tbaa !13
-  %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !46
-  %1 = load i8, i8* %arrayidx2, align 1, !dbg !46, !tbaa !13
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %indvars.iv, !dbg !45
+  %0 = load i8, ptr %arrayidx, align 1, !dbg !45, !tbaa !13
+  %arrayidx2 = getelementptr inbounds i8, ptr %B, i64 %indvars.iv, !dbg !46
+  %1 = load i8, ptr %arrayidx2, align 1, !dbg !46, !tbaa !13
   %add = add i8 %1, %0, !dbg !47
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !43
-  %arrayidx7 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv.next, !dbg !48
-  store i8 %add, i8* %arrayidx7, align 1, !dbg !49, !tbaa !13
-  %arrayidx9 = getelementptr inbounds i8, i8* %D, i64 %indvars.iv, !dbg !50
-  %2 = load i8, i8* %arrayidx9, align 1, !dbg !50, !tbaa !13
-  %arrayidx12 = getelementptr inbounds i8, i8* %E, i64 %indvars.iv, !dbg !51
-  %3 = load i8, i8* %arrayidx12, align 1, !dbg !51, !tbaa !13
+  %arrayidx7 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv.next, !dbg !48
+  store i8 %add, ptr %arrayidx7, align 1, !dbg !49, !tbaa !13
+  %arrayidx9 = getelementptr inbounds i8, ptr %D, i64 %indvars.iv, !dbg !50
+  %2 = load i8, ptr %arrayidx9, align 1, !dbg !50, !tbaa !13
+  %arrayidx12 = getelementptr inbounds i8, ptr %E, i64 %indvars.iv, !dbg !51
+  %3 = load i8, ptr %arrayidx12, align 1, !dbg !51, !tbaa !13
   %mul = mul i8 %3, %2, !dbg !52
-  %arrayidx16 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !53
-  store i8 %mul, i8* %arrayidx16, align 1, !dbg !54, !tbaa !13
+  %arrayidx16 = getelementptr inbounds i8, ptr %C, i64 %indvars.iv, !dbg !53
+  store i8 %mul, ptr %arrayidx16, align 1, !dbg !54, !tbaa !13
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !43
   %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !43
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !43, !llvm.loop !55

diff  --git a/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info.ll b/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info.ll
index ab1401f0f533a..30d11a12c79c4 100644
--- a/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info.ll
+++ b/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info.ll
@@ -18,21 +18,21 @@
 ;   1	void cold(char *A, char *B, char *C, char *D, char *E, int N) {
 ;   2	  for(int i = 0; i < N; i++) {
 ;   3	    A[i + 1] = A[i] + B[i];
-;   4	    C[i] = D[i] * E[i];
+;   4	    C[i] = Dptr E[i];
 ;   5	  }
 ;   6	}
 ;   7
 ;   8	void hot(char *A, char *B, char *C, char *D, char *E, int N) {
 ;   9	  for(int i = 0; i < N; i++) {
 ;  10	    A[i + 1] = A[i] + B[i];
-;  11	    C[i] = D[i] * E[i];
+;  11	    C[i] = Dptr E[i];
 ;  12	  }
 ;  13	}
 ;  14
 ;  15	void unknown(char *A, char *B, char *C, char *D, char *E, int N) {
 ;  16	  for(int i = 0; i < N; i++) {
 ;  17	    A[i + 1] = A[i] + B[i];
-;  18	    C[i] = D[i] * E[i];
+;  18	    C[i] = Dptr E[i];
 ;  19	  }
 ;  20	}
 
@@ -47,7 +47,7 @@ source_filename = "/tmp/s.c"
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: norecurse nounwind ssp uwtable
-define void @cold(i8* nocapture %A, i8* nocapture readonly %B, i8* nocapture %C, i8* nocapture readonly %D, i8* nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !7 !prof !56 {
+define void @cold(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture %C, ptr nocapture readonly %D, ptr nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !7 !prof !56 {
 entry:
   %cmp28 = icmp sgt i32 %N, 0, !dbg !9
   br i1 %cmp28, label %ph, label %for.cond.cleanup, !dbg !10, !prof !58
@@ -57,21 +57,21 @@ ph:
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !12
-  %0 = load i8, i8* %arrayidx, align 1, !dbg !12, !tbaa !13
-  %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !16
-  %1 = load i8, i8* %arrayidx2, align 1, !dbg !16, !tbaa !13
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %indvars.iv, !dbg !12
+  %0 = load i8, ptr %arrayidx, align 1, !dbg !12, !tbaa !13
+  %arrayidx2 = getelementptr inbounds i8, ptr %B, i64 %indvars.iv, !dbg !16
+  %1 = load i8, ptr %arrayidx2, align 1, !dbg !16, !tbaa !13
   %add = add i8 %1, %0, !dbg !17
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
-  %arrayidx7 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv.next, !dbg !18
-  store i8 %add, i8* %arrayidx7, align 1, !dbg !19, !tbaa !13
-  %arrayidx9 = getelementptr inbounds i8, i8* %D, i64 %indvars.iv, !dbg !20
-  %2 = load i8, i8* %arrayidx9, align 1, !dbg !20, !tbaa !13
-  %arrayidx12 = getelementptr inbounds i8, i8* %E, i64 %indvars.iv, !dbg !21
-  %3 = load i8, i8* %arrayidx12, align 1, !dbg !21, !tbaa !13
+  %arrayidx7 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv.next, !dbg !18
+  store i8 %add, ptr %arrayidx7, align 1, !dbg !19, !tbaa !13
+  %arrayidx9 = getelementptr inbounds i8, ptr %D, i64 %indvars.iv, !dbg !20
+  %2 = load i8, ptr %arrayidx9, align 1, !dbg !20, !tbaa !13
+  %arrayidx12 = getelementptr inbounds i8, ptr %E, i64 %indvars.iv, !dbg !21
+  %3 = load i8, ptr %arrayidx12, align 1, !dbg !21, !tbaa !13
   %mul = mul i8 %3, %2, !dbg !22
-  %arrayidx16 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !23
-  store i8 %mul, i8* %arrayidx16, align 1, !dbg !24, !tbaa !13
+  %arrayidx16 = getelementptr inbounds i8, ptr %C, i64 %indvars.iv, !dbg !23
+  store i8 %mul, ptr %arrayidx16, align 1, !dbg !24, !tbaa !13
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10
   %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !10
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !10, !llvm.loop !25, !prof !59
@@ -81,7 +81,7 @@ for.cond.cleanup:
 }
 
 ; Function Attrs: norecurse nounwind ssp uwtable
-define void @hot(i8* nocapture %A, i8* nocapture readonly %B, i8* nocapture %C, i8* nocapture readonly %D, i8* nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !26 !prof !57 {
+define void @hot(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture %C, ptr nocapture readonly %D, ptr nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !26 !prof !57 {
 entry:
   %cmp28 = icmp sgt i32 %N, 0, !dbg !27
   br i1 %cmp28, label %ph, label %for.cond.cleanup, !dbg !28, !prof !58
@@ -91,21 +91,21 @@ ph:
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !30
-  %0 = load i8, i8* %arrayidx, align 1, !dbg !30, !tbaa !13
-  %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !31
-  %1 = load i8, i8* %arrayidx2, align 1, !dbg !31, !tbaa !13
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %indvars.iv, !dbg !30
+  %0 = load i8, ptr %arrayidx, align 1, !dbg !30, !tbaa !13
+  %arrayidx2 = getelementptr inbounds i8, ptr %B, i64 %indvars.iv, !dbg !31
+  %1 = load i8, ptr %arrayidx2, align 1, !dbg !31, !tbaa !13
   %add = add i8 %1, %0, !dbg !32
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !28
-  %arrayidx7 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv.next, !dbg !33
-  store i8 %add, i8* %arrayidx7, align 1, !dbg !34, !tbaa !13
-  %arrayidx9 = getelementptr inbounds i8, i8* %D, i64 %indvars.iv, !dbg !35
-  %2 = load i8, i8* %arrayidx9, align 1, !dbg !35, !tbaa !13
-  %arrayidx12 = getelementptr inbounds i8, i8* %E, i64 %indvars.iv, !dbg !36
-  %3 = load i8, i8* %arrayidx12, align 1, !dbg !36, !tbaa !13
+  %arrayidx7 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv.next, !dbg !33
+  store i8 %add, ptr %arrayidx7, align 1, !dbg !34, !tbaa !13
+  %arrayidx9 = getelementptr inbounds i8, ptr %D, i64 %indvars.iv, !dbg !35
+  %2 = load i8, ptr %arrayidx9, align 1, !dbg !35, !tbaa !13
+  %arrayidx12 = getelementptr inbounds i8, ptr %E, i64 %indvars.iv, !dbg !36
+  %3 = load i8, ptr %arrayidx12, align 1, !dbg !36, !tbaa !13
   %mul = mul i8 %3, %2, !dbg !37
-  %arrayidx16 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !38
-  store i8 %mul, i8* %arrayidx16, align 1, !dbg !39, !tbaa !13
+  %arrayidx16 = getelementptr inbounds i8, ptr %C, i64 %indvars.iv, !dbg !38
+  store i8 %mul, ptr %arrayidx16, align 1, !dbg !39, !tbaa !13
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !28
   %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !28
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !28, !llvm.loop !40, !prof !59
@@ -115,7 +115,7 @@ for.cond.cleanup:
 }
 
 ; Function Attrs: norecurse nounwind ssp uwtable
-define void @unknown(i8* nocapture %A, i8* nocapture readonly %B, i8* nocapture %C, i8* nocapture readonly %D, i8* nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !41 {
+define void @unknown(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture %C, ptr nocapture readonly %D, ptr nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !41 {
 entry:
   %cmp28 = icmp sgt i32 %N, 0, !dbg !42
   br i1 %cmp28, label %for.body, label %for.cond.cleanup, !dbg !43
@@ -125,21 +125,21 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !45
-  %0 = load i8, i8* %arrayidx, align 1, !dbg !45, !tbaa !13
-  %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !46
-  %1 = load i8, i8* %arrayidx2, align 1, !dbg !46, !tbaa !13
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %indvars.iv, !dbg !45
+  %0 = load i8, ptr %arrayidx, align 1, !dbg !45, !tbaa !13
+  %arrayidx2 = getelementptr inbounds i8, ptr %B, i64 %indvars.iv, !dbg !46
+  %1 = load i8, ptr %arrayidx2, align 1, !dbg !46, !tbaa !13
   %add = add i8 %1, %0, !dbg !47
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !43
-  %arrayidx7 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv.next, !dbg !48
-  store i8 %add, i8* %arrayidx7, align 1, !dbg !49, !tbaa !13
-  %arrayidx9 = getelementptr inbounds i8, i8* %D, i64 %indvars.iv, !dbg !50
-  %2 = load i8, i8* %arrayidx9, align 1, !dbg !50, !tbaa !13
-  %arrayidx12 = getelementptr inbounds i8, i8* %E, i64 %indvars.iv, !dbg !51
-  %3 = load i8, i8* %arrayidx12, align 1, !dbg !51, !tbaa !13
+  %arrayidx7 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv.next, !dbg !48
+  store i8 %add, ptr %arrayidx7, align 1, !dbg !49, !tbaa !13
+  %arrayidx9 = getelementptr inbounds i8, ptr %D, i64 %indvars.iv, !dbg !50
+  %2 = load i8, ptr %arrayidx9, align 1, !dbg !50, !tbaa !13
+  %arrayidx12 = getelementptr inbounds i8, ptr %E, i64 %indvars.iv, !dbg !51
+  %3 = load i8, ptr %arrayidx12, align 1, !dbg !51, !tbaa !13
   %mul = mul i8 %3, %2, !dbg !52
-  %arrayidx16 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !53
-  store i8 %mul, i8* %arrayidx16, align 1, !dbg !54, !tbaa !13
+  %arrayidx16 = getelementptr inbounds i8, ptr %C, i64 %indvars.iv, !dbg !53
+  store i8 %mul, ptr %arrayidx16, align 1, !dbg !54, !tbaa !13
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !43
   %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !43
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !43, !llvm.loop !55

diff  --git a/llvm/test/Transforms/LoopVectorize/disable_nonforced.ll b/llvm/test/Transforms/LoopVectorize/disable_nonforced.ll
index 3ac4e5293c3af..9a8f8639871ce 100644
--- a/llvm/test/Transforms/LoopVectorize/disable_nonforced.ll
+++ b/llvm/test/Transforms/LoopVectorize/disable_nonforced.ll
@@ -7,16 +7,16 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; CHECK-LABEL: @disable_nonforced(
 ; CHECK-NOT: x i32>
-define void @disable_nonforced(i32* nocapture %a, i32 %n) {
+define void @disable_nonforced(ptr nocapture %a, i32 %n) {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
   %0 = trunc i64 %indvars.iv to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n

diff  --git a/llvm/test/Transforms/LoopVectorize/disable_nonforced_enable.ll b/llvm/test/Transforms/LoopVectorize/disable_nonforced_enable.ll
index f4a74f6fe3640..8c4e50fdd7a61 100644
--- a/llvm/test/Transforms/LoopVectorize/disable_nonforced_enable.ll
+++ b/llvm/test/Transforms/LoopVectorize/disable_nonforced_enable.ll
@@ -7,16 +7,16 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; CHECK-LABEL: @disable_nonforced_enable(
 ; CHECK: store <2 x i32>
-define void @disable_nonforced_enable(i32* nocapture %a, i32 %n) {
+define void @disable_nonforced_enable(ptr nocapture %a, i32 %n) {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
   %0 = trunc i64 %indvars.iv to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n

diff  --git a/llvm/test/Transforms/LoopVectorize/discriminator.ll b/llvm/test/Transforms/LoopVectorize/discriminator.ll
index 59379f4cc38c2..6c7d10f563ae9 100644
--- a/llvm/test/Transforms/LoopVectorize/discriminator.ll
+++ b/llvm/test/Transforms/LoopVectorize/discriminator.ll
@@ -14,25 +14,25 @@
 ;  6     a[i] += b[i];
 ;  7 }
 
- at a = local_unnamed_addr global i32* null, align 8
- at b = local_unnamed_addr global i32* null, align 8
+ at a = local_unnamed_addr global ptr null, align 8
+ at b = local_unnamed_addr global ptr null, align 8
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 define void @_Z3foov() local_unnamed_addr #0 !dbg !6 {
-  %1 = load i32*, i32** @b, align 8, !dbg !8, !tbaa !9
-  %2 = load i32*, i32** @a, align 8, !dbg !13, !tbaa !9
+  %1 = load ptr, ptr @b, align 8, !dbg !8, !tbaa !9
+  %2 = load ptr, ptr @a, align 8, !dbg !13, !tbaa !9
   br label %3, !dbg !14
 
 ; <label>:3:                                      ; preds = %3, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %3 ]
-  %4 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv, !dbg !8
-  %5 = load i32, i32* %4, align 4, !dbg !8, !tbaa !15
-  %6 = getelementptr inbounds i32, i32* %2, i64 %indvars.iv, !dbg !13
-  %7 = load i32, i32* %6, align 4, !dbg !17, !tbaa !15
+  %4 = getelementptr inbounds i32, ptr %1, i64 %indvars.iv, !dbg !8
+  %5 = load i32, ptr %4, align 4, !dbg !8, !tbaa !15
+  %6 = getelementptr inbounds i32, ptr %2, i64 %indvars.iv, !dbg !13
+  %7 = load i32, ptr %6, align 4, !dbg !17, !tbaa !15
   %8 = add nsw i32 %7, %5, !dbg !17
 ;DBG_VALUE: call void @llvm.dbg.declare{{.*}}!dbg ![[DBG:[0-9]*]]
   call void @llvm.dbg.declare(metadata i32 %8, metadata !22, metadata !DIExpression()), !dbg !17
-  store i32 %8, i32* %6, align 4, !dbg !17, !tbaa !15
+  store i32 %8, ptr %6, align 4, !dbg !17, !tbaa !15
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !18
   %exitcond = icmp eq i64 %indvars.iv.next, 4096, !dbg !19
   br i1 %exitcond, label %9, label %3, !dbg !14, !llvm.loop !20

diff  --git a/llvm/test/Transforms/LoopVectorize/exact.ll b/llvm/test/Transforms/LoopVectorize/exact.ll
index 77d31aec3c2ae..0012acfca1d83 100644
--- a/llvm/test/Transforms/LoopVectorize/exact.ll
+++ b/llvm/test/Transforms/LoopVectorize/exact.ll
@@ -4,16 +4,16 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; CHECK-LABEL: @lshr_exact(
 ; CHECK: lshr exact <4 x i32>
-define void @lshr_exact(i32* %x) {
+define void @lshr_exact(ptr %x) {
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %conv1 = lshr exact i32 %0, 1
-  store i32 %conv1, i32* %arrayidx, align 4
+  store i32 %conv1, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 256
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll b/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll
index b8a10662a4e8f..e01115888446a 100644
--- a/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll
+++ b/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll
@@ -11,7 +11,7 @@
 ; #pragma clang loop vectorize(enable)
 ;   for (i = 0; i < N; i++) {
 ;     for (j = 0; j < M; j++) {
-;       a[i*M+j] = b[i*M+j] * b[i*M+j];
+;       a[i*M+j] = bptr b[i*M+j];
 ;     }
 ;   }
 ; }
@@ -27,7 +27,7 @@
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
-define void @vector_width(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
+define void @vector_width(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
 entry:
   %cmp32 = icmp sgt i32 %N, 0
   br i1 %cmp32, label %outer.ph, label %for.end15
@@ -50,11 +50,11 @@ inner.ph:                                   ; preds = %outer.body
 inner.body:                                 ; preds = %inner.body, %inner.ph
   %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
   %2 = add nsw i64 %indvars.iv, %1
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
-  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %2
+  %3 = load i32, ptr %arrayidx, align 4, !tbaa !2
   %mul8 = mul nsw i32 %3, %3
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %mul8, ptr %arrayidx12, align 4, !tbaa !2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %outer.inc, label %inner.body
@@ -75,7 +75,7 @@ for.end15:                                        ; preds = %outer.inc, %entry
 ; CHECK: LV: We can vectorize this outer loop!
 ; CHECK: LV: Using VF 1 to build VPlans.
 
-define void @case2(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
+define void @case2(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
 entry:
   %cmp32 = icmp sgt i32 %N, 0
   br i1 %cmp32, label %outer.ph, label %for.end15
@@ -98,11 +98,11 @@ inner.ph:                                  ; preds = %outer.body
 inner.body:                                        ; preds = %inner.body, %inner.ph
   %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
   %2 = add nsw i64 %indvars.iv, %1
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
-  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %2
+  %3 = load i32, ptr %arrayidx, align 4, !tbaa !2
   %mul8 = mul nsw i32 %3, %3
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %mul8, ptr %arrayidx12, align 4, !tbaa !2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %outer.inc, label %inner.body
@@ -125,7 +125,7 @@ for.end15:                                        ; preds = %outer.inc, %entry
 ; CHECK: LV: Loop hints: force=?
 ; CHECK: LV: Found a loop: inner.body
 
-define void @case3(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
+define void @case3(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
 entry:
   %cmp32 = icmp sgt i32 %N, 0
   br i1 %cmp32, label %outer.ph, label %for.end15
@@ -148,11 +148,11 @@ inner.ph:                                         ; preds = %outer.body
 inner.body:                                       ; preds = %inner.body, %inner.ph
   %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
   %2 = add nsw i64 %indvars.iv, %1
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
-  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %2
+  %3 = load i32, ptr %arrayidx, align 4, !tbaa !2
   %mul8 = mul nsw i32 %3, %3
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %mul8, ptr %arrayidx12, align 4, !tbaa !2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %outer.inc, label %inner.body
@@ -175,7 +175,7 @@ for.end15:                                        ; preds = %outer.inc, %entry
 ; CHECK: LV: Loop hints: force=?
 ; CHECK: LV: Found a loop: inner.body
 
-define void @case4(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
+define void @case4(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
 entry:
   %cmp32 = icmp sgt i32 %N, 0
   br i1 %cmp32, label %outer.ph, label %for.end15
@@ -198,11 +198,11 @@ inner.ph:                                  ; preds = %outer.body
 inner.body:                                        ; preds = %inner.body, %inner.ph
   %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
   %2 = add nsw i64 %indvars.iv, %1
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
-  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %2
+  %3 = load i32, ptr %arrayidx, align 4, !tbaa !2
   %mul8 = mul nsw i32 %3, %3
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %mul8, ptr %arrayidx12, align 4, !tbaa !2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %outer.inc, label %inner.body

diff  --git a/llvm/test/Transforms/LoopVectorize/explicit_outer_nonuniform_inner.ll b/llvm/test/Transforms/LoopVectorize/explicit_outer_nonuniform_inner.ll
index 7060864d19331..1253bbff85fd4 100644
--- a/llvm/test/Transforms/LoopVectorize/explicit_outer_nonuniform_inner.ll
+++ b/llvm/test/Transforms/LoopVectorize/explicit_outer_nonuniform_inner.ll
@@ -12,7 +12,7 @@
 ;   for (i = 0; i < N; i++) {
 ;     // Tested inner loop. It will be replaced per test.
 ;     for (j = 0; j < M; j++) {
-;       a[i*M+j] = b[i*M+j] * b[i*M+j];
+;       a[i*M+j] = bptr b[i*M+j];
 ;     }
 ;   }
 ; }
@@ -25,7 +25,7 @@
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
-define void @iv_start(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
+define void @iv_start(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
 entry:
   %cmp33 = icmp sgt i32 %N, 0
   br i1 %cmp33, label %outer.ph, label %for.end15
@@ -48,11 +48,11 @@ inner.ph:                                   ; preds = %outer.body
 inner.body:                                 ; preds = %inner.body, %inner.ph
   %indvars.iv35 = phi i64 [ %indvars.iv38, %inner.ph ], [ %indvars.iv.next36, %inner.body ]
   %2 = add nsw i64 %indvars.iv35, %1
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
-  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %2
+  %3 = load i32, ptr %arrayidx, align 4, !tbaa !2
   %mul8 = mul nsw i32 %3, %3
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %mul8, ptr %arrayidx12, align 4, !tbaa !2
   %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1
   %exitcond = icmp eq i64 %indvars.iv.next36, %wide.trip.count
   br i1 %exitcond, label %outer.inc, label %inner.body
@@ -73,7 +73,7 @@ for.end15:                                  ; preds = %outer.inc, %entry
 ; CHECK: LV: Not vectorizing: Outer loop contains divergent loops.
 ; CHECK: LV: Not vectorizing: Unsupported outer loop.
 
-define void @loop_ub(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
+define void @loop_ub(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
 entry:
   %cmp32 = icmp sgt i32 %N, 0
   br i1 %cmp32, label %outer.ph, label %for.end15
@@ -95,11 +95,11 @@ inner.ph:                                   ; preds = %outer.body
 inner.body:                                 ; preds = %inner.body, %inner.ph
   %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
   %2 = add nsw i64 %indvars.iv, %1
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
-  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %2
+  %3 = load i32, ptr %arrayidx, align 4, !tbaa !2
   %mul8 = mul nsw i32 %3, %3
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %mul8, ptr %arrayidx12, align 4, !tbaa !2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %indvars.iv38
   br i1 %exitcond, label %outer.inc, label %inner.body
@@ -119,7 +119,7 @@ for.end15:                                  ; preds = %outer.inc, %entry
 ; CHECK: LV: Not vectorizing: Outer loop contains divergent loops.
 ; CHECK: LV: Not vectorizing: Unsupported outer loop.
 
-define void @iv_step(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
+define void @iv_step(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
 entry:
   %cmp33 = icmp sgt i32 %N, 0
   br i1 %cmp33, label %outer.ph, label %for.end15
@@ -141,11 +141,11 @@ inner.ph:                                   ; preds = %outer.body
 inner.body:                                 ; preds = %inner.ph, %inner.body
   %indvars.iv36 = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next37, %inner.body ]
   %2 = add nsw i64 %indvars.iv36, %1
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
-  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %2
+  %3 = load i32, ptr %arrayidx, align 4, !tbaa !2
   %mul8 = mul nsw i32 %3, %3
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %mul8, ptr %arrayidx12, align 4, !tbaa !2
   %indvars.iv.next37 = add nuw nsw i64 %indvars.iv36, %indvars.iv39
   %cmp2 = icmp slt i64 %indvars.iv.next37, %0
   br i1 %cmp2, label %inner.body, label %for.inc14

diff  --git a/llvm/test/Transforms/LoopVectorize/explicit_outer_uniform_diverg_branch.ll b/llvm/test/Transforms/LoopVectorize/explicit_outer_uniform_diverg_branch.ll
index 259f4f41ef349..ac87c1500114b 100644
--- a/llvm/test/Transforms/LoopVectorize/explicit_outer_uniform_diverg_branch.ll
+++ b/llvm/test/Transforms/LoopVectorize/explicit_outer_uniform_diverg_branch.ll
@@ -13,7 +13,7 @@
 ;     // Tested conditional branch. COND will be replaced per test.
 ;     if (COND)
 ;       for (j = 0; j < M; j++) {
-;         a[i*M+j] = b[i*M+j] * b[i*M+j];
+;         a[i*M+j] = bptr b[i*M+j];
 ;       }
 ;   }
 ; }
@@ -25,7 +25,7 @@
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
-define void @uniform_branch(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
+define void @uniform_branch(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
 entry:
   %cmp39 = icmp sgt i32 %N, 0
   br i1 %cmp39, label %outer.ph, label %for.end19
@@ -42,8 +42,8 @@ outer.ph:                                   ; preds = %entry
 outer.body:                                 ; preds = %outer.inc, %outer.ph
   %indvars.iv42 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next43, %outer.inc ]
   %1 = mul nsw i64 %indvars.iv42, %0
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %1
-  %2 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %1
+  %2 = load i32, ptr %arrayidx, align 4, !tbaa !2
   br i1 %brmerge, label %outer.inc, label %inner.ph ; Supported uniform branch
 
 inner.ph:                                   ; preds = %outer.body
@@ -52,11 +52,11 @@ inner.ph:                                   ; preds = %outer.body
 inner.body:                                 ; preds = %inner.ph, %inner.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %inner.body ], [ 0, %inner.ph ]
   %3 = add nsw i64 %indvars.iv, %1
-  %arrayidx7 = getelementptr inbounds i32, i32* %b, i64 %3
-  %4 = load i32, i32* %arrayidx7, align 4, !tbaa !2
+  %arrayidx7 = getelementptr inbounds i32, ptr %b, i64 %3
+  %4 = load i32, ptr %arrayidx7, align 4, !tbaa !2
   %mul12 = mul nsw i32 %4, %4
-  %arrayidx16 = getelementptr inbounds i32, i32* %a, i64 %3
-  store i32 %mul12, i32* %arrayidx16, align 4, !tbaa !2
+  %arrayidx16 = getelementptr inbounds i32, ptr %a, i64 %3
+  store i32 %mul12, ptr %arrayidx16, align 4, !tbaa !2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %M64
   br i1 %exitcond, label %outer.inc, label %inner.body
@@ -77,7 +77,7 @@ for.end19:                                  ; preds = %outer.inc, %entry
 ; CHECK: Unsupported conditional branch.
 ; CHECK: LV: Not vectorizing: Unsupported outer loop.
 
-define void @divergent_branch(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
+define void @divergent_branch(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
 entry:
   %cmp39 = icmp sgt i32 %N, 0
   br i1 %cmp39, label %outer.ph, label %for.end19
@@ -92,8 +92,8 @@ outer.ph:                                   ; preds = %entry
 outer.body:                                 ; preds = %outer.inc, %outer.ph
   %indvars.iv42 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next43, %outer.inc ]
   %1 = mul nsw i64 %indvars.iv42, %0
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %1
-  %2 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %1
+  %2 = load i32, ptr %arrayidx, align 4, !tbaa !2
   %cmp1 = icmp ne i32 %2, 0 ; Divergent condition
   %brmerge = or i1 %cmp1, %cmp337 ; Divergent condition
   br i1 %brmerge, label %outer.inc, label %inner.ph ; Unsupported divergent branch.
@@ -104,11 +104,11 @@ inner.ph:                                   ; preds = %outer.body
 inner.body:                                 ; preds = %inner.ph, %inner.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %inner.body ], [ 0, %inner.ph ]
   %3 = add nsw i64 %indvars.iv, %1
-  %arrayidx7 = getelementptr inbounds i32, i32* %b, i64 %3
-  %4 = load i32, i32* %arrayidx7, align 4, !tbaa !2
+  %arrayidx7 = getelementptr inbounds i32, ptr %b, i64 %3
+  %4 = load i32, ptr %arrayidx7, align 4, !tbaa !2
   %mul12 = mul nsw i32 %4, %4
-  %arrayidx16 = getelementptr inbounds i32, i32* %a, i64 %3
-  store i32 %mul12, i32* %arrayidx16, align 4, !tbaa !2
+  %arrayidx16 = getelementptr inbounds i32, ptr %a, i64 %3
+  store i32 %mul12, ptr %arrayidx16, align 4, !tbaa !2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %M64
   br i1 %exitcond, label %outer.inc, label %inner.body

diff  --git a/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll b/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll
index cb5072c50d630..90ce05318332c 100644
--- a/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll
+++ b/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=loop-vectorize,dce,instcombine -S -force-vector-width=4 < %s 2>%t | FileCheck %s
 
-define void @inv_store_last_lane(i32* noalias nocapture %a, i32* noalias nocapture %inv, i32* noalias nocapture readonly %b, i64 %n) {
+define void @inv_store_last_lane(ptr noalias nocapture %a, ptr noalias nocapture %inv, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @inv_store_last_lane(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
@@ -11,13 +11,11 @@ define void @inv_store_last_lane(i32* noalias nocapture %a, i32* noalias nocaptu
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl nsw <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -30,18 +28,18 @@ define void @inv_store_last_lane(i32* noalias nocapture %a, i32* noalias nocaptu
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[TMP7]], 1
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[INV:%.*]], i64 42
-; CHECK-NEXT:    store i32 [[MUL_LCSSA]], i32* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[INV:%.*]], i64 42
+; CHECK-NEXT:    store i32 [[MUL_LCSSA]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -49,22 +47,22 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %mul = shl nsw i32 %0, 1
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 %mul, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %mul, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %exit, label %for.body
 
 exit:              ; preds = %for.body
-  %arrayidx5 = getelementptr inbounds i32, i32* %inv, i64 42
-  store i32 %mul, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %inv, i64 42
+  store i32 %mul, ptr %arrayidx5, align 4
   ret void
 }
 
-define float @ret_last_lane(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) {
+define float @ret_last_lane(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @ret_last_lane(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
@@ -74,13 +72,11 @@ define float @ret_last_lane(float* noalias nocapture %a, float* noalias nocaptur
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[TMP3]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -93,11 +89,11 @@ define float @ret_last_lane(float* noalias nocapture %a, float* noalias nocaptur
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load float, float* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP7]], 2.000000e+00
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store float [[MUL]], float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store float [[MUL]], ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -110,11 +106,11 @@ entry:
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %mul = fmul float %0, 2.000000e+00
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %mul, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %mul, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %exit, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/fix-reduction-dbg.ll b/llvm/test/Transforms/LoopVectorize/fix-reduction-dbg.ll
index 7837ca8eeb9dd..3fd1b573a1ed4 100644
--- a/llvm/test/Transforms/LoopVectorize/fix-reduction-dbg.ll
+++ b/llvm/test/Transforms/LoopVectorize/fix-reduction-dbg.ll
@@ -28,7 +28,7 @@
 ;   return ret;
 ; }
 
-define dso_local i32 @"foo"(i32 %count, i32* nocapture readonly %bar) local_unnamed_addr !dbg !8 {
+define dso_local i32 @"foo"(i32 %count, ptr nocapture readonly %bar) local_unnamed_addr !dbg !8 {
 entry:
   %cmp8 = icmp sgt i32 %count, 0, !dbg !10
   br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup, !dbg !10
@@ -48,8 +48,8 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 for.body:                                         ; preds = %for.body, %for.body.preheader
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
   %ret.09 = phi i32 [ %count, %for.body.preheader ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %bar, i64 %indvars.iv, !dbg !11
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !11, !tbaa !15
+  %arrayidx = getelementptr inbounds i32, ptr %bar, i64 %indvars.iv, !dbg !11
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !11, !tbaa !15
   %add = add nsw i32 %0, %ret.09, !dbg !12
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count, !dbg !10

diff  --git a/llvm/test/Transforms/LoopVectorize/flags.ll b/llvm/test/Transforms/LoopVectorize/flags.ll
index df4c0fa9ca5eb..60424490e030b 100644
--- a/llvm/test/Transforms/LoopVectorize/flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/flags.ll
@@ -7,16 +7,16 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK: mul nsw <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret i32
-define i32 @flags1(i32 %n, i32* nocapture %A) nounwind uwtable ssp {
+define i32 @flags1(i32 %n, ptr nocapture %A) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 9
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
   %4 = mul nsw i32 %3, 3
-  store i32 %4, i32* %2, align 4
+  store i32 %4, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -32,16 +32,16 @@ define i32 @flags1(i32 %n, i32* nocapture %A) nounwind uwtable ssp {
 ;CHECK: mul <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret i32
-define i32 @flags2(i32 %n, i32* nocapture %A) nounwind uwtable ssp {
+define i32 @flags2(i32 %n, ptr nocapture %A) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 9
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
   %4 = mul i32 %3, 3
-  store i32 %4, i32* %2, align 4
+  store i32 %4, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -57,15 +57,15 @@ define i32 @flags2(i32 %n, i32* nocapture %A) nounwind uwtable ssp {
 ; CHECK: fadd fast <4 x float>
 ; CHECK: br
 ; CHECK: call fast float @llvm.vector.reduce.fadd.v4f32
-define float @fast_math(float* noalias %s) {
+define float @fast_math(ptr noalias %s) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %q.04 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %s, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %s, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd fast float %q.04, %0
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 256

diff  --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll
index 75193ccfb586a..f808a9ad0b579 100644
--- a/llvm/test/Transforms/LoopVectorize/float-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll
@@ -6,7 +6,7 @@
 
 @fp_inc = common global float 0.000000e+00, align 4
 
-;void fp_iv_loop1(float init, float * __restrict__ A, int N) {
+;void fp_iv_loop1(float init, ptr __restrict__ A, int N) {
 ;  float x = init;
 ;  for (int i=0; i < N; ++i) {
 ;    A[i] = x;
@@ -17,13 +17,13 @@
 
 
 
-define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32 %N) {
+define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) {
 ; VEC4_INTERL1-LABEL: @fp_iv_loop1_fast_FMF(
 ; VEC4_INTERL1-NEXT:  entry:
 ; VEC4_INTERL1-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC4_INTERL1-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC4_INTERL1:       for.body.lr.ph:
-; VEC4_INTERL1-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
+; VEC4_INTERL1-NEXT:    [[FPINC:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC4_INTERL1-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; VEC4_INTERL1-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; VEC4_INTERL1-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -47,9 +47,8 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC4_INTERL1:       vector.body:
 ; VEC4_INTERL1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL1-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
-; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP6]], align 4
 ; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fsub fast <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
 ; VEC4_INTERL1-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -64,8 +63,8 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC4_INTERL1:       for.body:
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC4_INTERL1-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL1-NEXT:    [[ADD]] = fsub fast float [[X_05]], [[FPINC]]
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -81,7 +80,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC4_INTERL2-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC4_INTERL2-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC4_INTERL2:       for.body.lr.ph:
-; VEC4_INTERL2-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
+; VEC4_INTERL2-NEXT:    [[FPINC:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC4_INTERL2-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; VEC4_INTERL2-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -106,12 +105,10 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC4_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[STEP_ADD:%.*]] = fsub fast <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
-; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4
-; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 4
-; VEC4_INTERL2-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP9]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP6]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 4
+; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], ptr [[TMP8]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fsub fast <4 x float> [[STEP_ADD]], [[DOTSPLAT5]]
 ; VEC4_INTERL2-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -126,8 +123,8 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC4_INTERL2:       for.body:
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC4_INTERL2-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL2-NEXT:    [[ADD]] = fsub fast float [[X_05]], [[FPINC]]
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -143,7 +140,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC1_INTERL2-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC1_INTERL2-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC1_INTERL2:       for.body.lr.ph:
-; VEC1_INTERL2-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
+; VEC1_INTERL2-NEXT:    [[FPINC:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC1_INTERL2-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; VEC1_INTERL2-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; VEC1_INTERL2-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -162,10 +159,10 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC1_INTERL2-NEXT:    [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP5]]
 ; VEC1_INTERL2-NEXT:    [[TMP6:%.*]] = fsub fast float [[OFFSET_IDX]], [[FPINC]]
 ; VEC1_INTERL2-NEXT:    [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1
-; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]]
-; VEC1_INTERL2-NEXT:    store float [[OFFSET_IDX]], float* [[TMP7]], align 4
-; VEC1_INTERL2-NEXT:    store float [[TMP6]], float* [[TMP8]], align 4
+; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDUCTION2]]
+; VEC1_INTERL2-NEXT:    store float [[OFFSET_IDX]], ptr [[TMP7]], align 4
+; VEC1_INTERL2-NEXT:    store float [[TMP6]], ptr [[TMP8]], align 4
 ; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC1_INTERL2-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; VEC1_INTERL2-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -179,8 +176,8 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC1_INTERL2:       for.body:
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC1_INTERL2-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC1_INTERL2-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC1_INTERL2-NEXT:    [[ADD]] = fsub fast float [[X_05]], [[FPINC]]
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -196,7 +193,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC2_INTERL1_PRED_STORE:       for.body.lr.ph:
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[FPINC:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -220,9 +217,8 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC2_INTERL1_PRED_STORE:       vector.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <2 x float>*
-; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], <2 x float>* [[TMP7]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], ptr [[TMP6]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fsub fast <2 x float> [[VEC_IND]], [[DOTSPLAT5]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -233,8 +229,8 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC2_INTERL1_PRED_STORE:       for.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[ADD]] = fsub fast float [[X_05]], [[FPINC]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -248,14 +244,14 @@ entry:
   br i1 %cmp4, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:                                   ; preds = %entry
-  %fpinc = load float, float* @fp_inc, align 4
+  %fpinc = load float, ptr @fp_inc, align 4
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %x.05 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  store float %x.05, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  store float %x.05, ptr %arrayidx, align 4
   %add = fsub fast float %x.05, %fpinc
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -272,7 +268,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; We do not need the full 'fast' FMF to vectorize the loop, but the code can't become
 ; 'fast' spontaneously - FMF should propagate from the original IR.
 
-define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i32 %N) {
+define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 %N) {
 ;
 ;
 ;
@@ -282,7 +278,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC4_INTERL1-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC4_INTERL1-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC4_INTERL1:       for.body.lr.ph:
-; VEC4_INTERL1-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
+; VEC4_INTERL1-NEXT:    [[FPINC:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC4_INTERL1-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; VEC4_INTERL1-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; VEC4_INTERL1-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -306,9 +302,8 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC4_INTERL1:       vector.body:
 ; VEC4_INTERL1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL1-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
-; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP6]], align 4
 ; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fsub reassoc <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
 ; VEC4_INTERL1-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -323,8 +318,8 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC4_INTERL1:       for.body:
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC4_INTERL1-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL1-NEXT:    [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]]
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -340,7 +335,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC4_INTERL2-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC4_INTERL2-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC4_INTERL2:       for.body.lr.ph:
-; VEC4_INTERL2-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
+; VEC4_INTERL2-NEXT:    [[FPINC:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC4_INTERL2-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; VEC4_INTERL2-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -365,12 +360,10 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC4_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[STEP_ADD:%.*]] = fsub reassoc <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
-; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4
-; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 4
-; VEC4_INTERL2-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP9]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP6]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 4
+; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], ptr [[TMP8]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fsub reassoc <4 x float> [[STEP_ADD]], [[DOTSPLAT5]]
 ; VEC4_INTERL2-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -385,8 +378,8 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC4_INTERL2:       for.body:
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC4_INTERL2-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL2-NEXT:    [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]]
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -402,7 +395,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC1_INTERL2-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC1_INTERL2-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC1_INTERL2:       for.body.lr.ph:
-; VEC1_INTERL2-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
+; VEC1_INTERL2-NEXT:    [[FPINC:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC1_INTERL2-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; VEC1_INTERL2-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; VEC1_INTERL2-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -423,10 +416,10 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = fsub reassoc float [[OFFSET_IDX]], [[TMP6]]
 ; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = fsub reassoc float [[OFFSET_IDX]], [[FPINC]]
 ; VEC1_INTERL2-NEXT:    [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1
-; VEC1_INTERL2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC1_INTERL2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]]
-; VEC1_INTERL2-NEXT:    store float [[TMP7]], float* [[TMP9]], align 4
-; VEC1_INTERL2-NEXT:    store float [[TMP8]], float* [[TMP10]], align 4
+; VEC1_INTERL2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC1_INTERL2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDUCTION2]]
+; VEC1_INTERL2-NEXT:    store float [[TMP7]], ptr [[TMP9]], align 4
+; VEC1_INTERL2-NEXT:    store float [[TMP8]], ptr [[TMP10]], align 4
 ; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC1_INTERL2-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; VEC1_INTERL2-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
@@ -440,8 +433,8 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC1_INTERL2:       for.body:
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC1_INTERL2-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC1_INTERL2-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC1_INTERL2-NEXT:    [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]]
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -457,7 +450,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC2_INTERL1_PRED_STORE:       for.body.lr.ph:
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[FPINC:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -481,9 +474,8 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC2_INTERL1_PRED_STORE:       vector.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <2 x float>*
-; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], <2 x float>* [[TMP7]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], ptr [[TMP6]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fsub reassoc <2 x float> [[VEC_IND]], [[DOTSPLAT5]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -494,8 +486,8 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC2_INTERL1_PRED_STORE:       for.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -509,14 +501,14 @@ entry:
   br i1 %cmp4, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:                                   ; preds = %entry
-  %fpinc = load float, float* @fp_inc, align 4
+  %fpinc = load float, ptr @fp_inc, align 4
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %x.05 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  store float %x.05, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  store float %x.05, ptr %arrayidx, align 4
   %add = fsub reassoc float %x.05, %fpinc
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -530,7 +522,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
   ret void
 }
 
-;void fp_iv_loop2(float init, float * __restrict__ A, int N) {
+;void fp_iv_loop2(float init, ptr __restrict__ A, int N) {
 ;  float x = init;
 ;  for (int i=0; i < N; ++i) {
 ;    A[i] = x;
@@ -539,7 +531,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ;}
 
 
-define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
+define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 {
 ; VEC4_INTERL1-LABEL: @fp_iv_loop2(
 ; VEC4_INTERL1-NEXT:  entry:
 ; VEC4_INTERL1-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -562,9 +554,8 @@ define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
 ; VEC4_INTERL1:       vector.body:
 ; VEC4_INTERL1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC4_INTERL1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL1-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>*
-; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP5]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP4]], align 4
 ; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
 ; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -579,8 +570,8 @@ define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
 ; VEC4_INTERL1:       for.body:
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC4_INTERL1-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC4_INTERL1-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL1-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -614,12 +605,10 @@ define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
 ; VEC4_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP5]], align 4
-; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 4
-; VEC4_INTERL2-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP7]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP4]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 4
+; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], ptr [[TMP6]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
 ; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -634,8 +623,8 @@ define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
 ; VEC4_INTERL2:       for.body:
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC4_INTERL2-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC4_INTERL2-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -669,10 +658,10 @@ define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
 ; VEC1_INTERL2-NEXT:    [[OFFSET_IDX:%.*]] = fadd fast float [[TMP5]], [[INIT]]
 ; VEC1_INTERL2-NEXT:    [[TMP6:%.*]] = fadd fast float [[OFFSET_IDX]], 5.000000e-01
 ; VEC1_INTERL2-NEXT:    [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1
-; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]]
-; VEC1_INTERL2-NEXT:    store float [[OFFSET_IDX]], float* [[TMP7]], align 4
-; VEC1_INTERL2-NEXT:    store float [[TMP6]], float* [[TMP8]], align 4
+; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDUCTION2]]
+; VEC1_INTERL2-NEXT:    store float [[OFFSET_IDX]], ptr [[TMP7]], align 4
+; VEC1_INTERL2-NEXT:    store float [[TMP6]], ptr [[TMP8]], align 4
 ; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC1_INTERL2-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; VEC1_INTERL2-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -686,8 +675,8 @@ define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
 ; VEC1_INTERL2:       for.body:
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC1_INTERL2-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC1_INTERL2-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC1_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -720,9 +709,8 @@ define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
 ; VEC2_INTERL1_PRED_STORE:       vector.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <2 x float>*
-; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], <2 x float>* [[TMP5]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], ptr [[TMP4]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], <float 1.000000e+00, float 1.000000e+00>
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -733,8 +721,8 @@ define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
 ; VEC2_INTERL1_PRED_STORE:       for.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -753,8 +741,8 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %x.06 = phi float [ %conv1, %for.body ], [ %init, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  store float %x.06, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  store float %x.06, ptr %arrayidx, align 4
   %conv1 = fadd fast float %x.06, 5.000000e-01
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -768,7 +756,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
   ret void
 }
 
-;void fp_iv_loop3(float init, float * __restrict__ A, float * __restrict__ B, float * __restrict__ C, int N) {
+;void fp_iv_loop3(float init, ptr __restrict__ A, ptr __restrict__ B, ptr __restrict__ C, int N) {
 ;  int i = 0;
 ;  float x = init;
 ;  float y = 0.1;
@@ -782,13 +770,13 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ;}
 
 
-define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalias nocapture %B, float* noalias nocapture %C, i32 %N) {
+define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias nocapture %B, ptr noalias nocapture %C, i32 %N) {
 ; VEC4_INTERL1-LABEL: @fp_iv_loop3(
 ; VEC4_INTERL1-NEXT:  entry:
 ; VEC4_INTERL1-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC4_INTERL1-NEXT:    br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC4_INTERL1:       for.body.lr.ph:
-; VEC4_INTERL1-NEXT:    [[TMP0:%.*]] = load float, float* @fp_inc, align 4
+; VEC4_INTERL1-NEXT:    [[TMP0:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC4_INTERL1-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -1
 ; VEC4_INTERL1-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
 ; VEC4_INTERL1-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
@@ -818,18 +806,15 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC4_INTERL1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ <float 0x3FB99999A0000000, float 0xBFD99999A0000000, float 0xBFECCCCCC0000000, float 0xBFF6666660000000>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[VEC_IND9:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ]
-; VEC4_INTERL1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL1-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
-; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND9]], <4 x float>* [[TMP9]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND9]], ptr [[TMP8]], align 4
 ; VEC4_INTERL1-NEXT:    [[TMP10:%.*]] = fadd fast <4 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]]
 ; VEC4_INTERL1-NEXT:    [[TMP11:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
 ; VEC4_INTERL1-NEXT:    [[TMP12:%.*]] = fadd fast <4 x float> [[TMP11]], [[TMP10]]
-; VEC4_INTERL1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; VEC4_INTERL1-NEXT:    [[TMP14:%.*]] = bitcast float* [[TMP13]] to <4 x float>*
-; VEC4_INTERL1-NEXT:    store <4 x float> [[TMP12]], <4 x float>* [[TMP14]], align 4
-; VEC4_INTERL1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]]
-; VEC4_INTERL1-NEXT:    [[TMP16:%.*]] = bitcast float* [[TMP15]] to <4 x float>*
-; VEC4_INTERL1-NEXT:    store <4 x float> [[TMP11]], <4 x float>* [[TMP16]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; VEC4_INTERL1-NEXT:    store <4 x float> [[TMP12]], ptr [[TMP13]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]]
+; VEC4_INTERL1-NEXT:    store <4 x float> [[TMP11]], ptr [[TMP15]], align 4
 ; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00>
 ; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT10]] = fadd fast <4 x float> [[VEC_IND9]], [[DOTSPLAT8]]
@@ -847,15 +832,15 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC4_INTERL1-NEXT:    store float [[X_011]], float* [[ARRAYIDX]], align 4
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    store float [[X_011]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL1-NEXT:    [[ADD]] = fadd fast float [[X_011]], [[TMP0]]
 ; VEC4_INTERL1-NEXT:    [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01
 ; VEC4_INTERL1-NEXT:    [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]]
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
-; VEC4_INTERL1-NEXT:    store float [[ADD2]], float* [[ARRAYIDX4]], align 4
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]]
-; VEC4_INTERL1-NEXT:    store float [[CONV1]], float* [[ARRAYIDX6]], align 4
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    store float [[ADD2]], ptr [[ARRAYIDX4]], align 4
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    store float [[CONV1]], ptr [[ARRAYIDX6]], align 4
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC4_INTERL1-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
@@ -870,7 +855,7 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC4_INTERL2-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC4_INTERL2-NEXT:    br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC4_INTERL2:       for.body.lr.ph:
-; VEC4_INTERL2-NEXT:    [[TMP0:%.*]] = load float, float* @fp_inc, align 4
+; VEC4_INTERL2-NEXT:    [[TMP0:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC4_INTERL2-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -1
 ; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
 ; VEC4_INTERL2-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
@@ -903,30 +888,24 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ <float 0x3FB99999A0000000, float 0xBFD99999A0000000, float 0xBFECCCCCC0000000, float 0xBFF6666660000000>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[VEC_IND10:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT13:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[STEP_ADD11:%.*]] = fadd fast <4 x float> [[VEC_IND10]], [[DOTSPLAT9]]
-; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND10]], <4 x float>* [[TMP9]], align 4
-; VEC4_INTERL2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 4
-; VEC4_INTERL2-NEXT:    [[TMP11:%.*]] = bitcast float* [[TMP10]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD11]], <4 x float>* [[TMP11]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND10]], ptr [[TMP8]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 4
+; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD11]], ptr [[TMP10]], align 4
 ; VEC4_INTERL2-NEXT:    [[TMP12:%.*]] = fadd fast <4 x float> [[VEC_IND10]], [[BROADCAST_SPLAT]]
 ; VEC4_INTERL2-NEXT:    [[TMP13:%.*]] = fadd fast <4 x float> [[STEP_ADD11]], [[BROADCAST_SPLAT15]]
 ; VEC4_INTERL2-NEXT:    [[TMP14:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
 ; VEC4_INTERL2-NEXT:    [[TMP15:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float -2.500000e+00, float -2.500000e+00, float -2.500000e+00, float -2.500000e+00>
 ; VEC4_INTERL2-NEXT:    [[TMP16:%.*]] = fadd fast <4 x float> [[TMP14]], [[TMP12]]
 ; VEC4_INTERL2-NEXT:    [[TMP17:%.*]] = fadd fast <4 x float> [[TMP15]], [[TMP13]]
-; VEC4_INTERL2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP19:%.*]] = bitcast float* [[TMP18]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP16]], <4 x float>* [[TMP19]], align 4
-; VEC4_INTERL2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 4
-; VEC4_INTERL2-NEXT:    [[TMP21:%.*]] = bitcast float* [[TMP20]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP17]], <4 x float>* [[TMP21]], align 4
-; VEC4_INTERL2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP23:%.*]] = bitcast float* [[TMP22]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP14]], <4 x float>* [[TMP23]], align 4
-; VEC4_INTERL2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 4
-; VEC4_INTERL2-NEXT:    [[TMP25:%.*]] = bitcast float* [[TMP24]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP15]], <4 x float>* [[TMP25]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP16]], ptr [[TMP18]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 4
+; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP17]], ptr [[TMP20]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]]
+; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP14]], ptr [[TMP22]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 4
+; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP15]], ptr [[TMP24]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float -4.000000e+00, float -4.000000e+00, float -4.000000e+00, float -4.000000e+00>
 ; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT13]] = fadd fast <4 x float> [[STEP_ADD11]], [[DOTSPLAT9]]
@@ -944,15 +923,15 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC4_INTERL2-NEXT:    store float [[X_011]], float* [[ARRAYIDX]], align 4
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    store float [[X_011]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL2-NEXT:    [[ADD]] = fadd fast float [[X_011]], [[TMP0]]
 ; VEC4_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01
 ; VEC4_INTERL2-NEXT:    [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]]
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
-; VEC4_INTERL2-NEXT:    store float [[ADD2]], float* [[ARRAYIDX4]], align 4
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]]
-; VEC4_INTERL2-NEXT:    store float [[CONV1]], float* [[ARRAYIDX6]], align 4
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    store float [[ADD2]], ptr [[ARRAYIDX4]], align 4
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    store float [[CONV1]], ptr [[ARRAYIDX6]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC4_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
@@ -967,7 +946,7 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC1_INTERL2-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC1_INTERL2-NEXT:    br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC1_INTERL2:       for.body.lr.ph:
-; VEC1_INTERL2-NEXT:    [[TMP0:%.*]] = load float, float* @fp_inc, align 4
+; VEC1_INTERL2-NEXT:    [[TMP0:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC1_INTERL2-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -1
 ; VEC1_INTERL2-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
 ; VEC1_INTERL2-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
@@ -991,24 +970,24 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC1_INTERL2-NEXT:    [[TMP9:%.*]] = sitofp i64 [[INDEX]] to float
 ; VEC1_INTERL2-NEXT:    [[TMP10:%.*]] = fmul fast float [[TMP9]], -5.000000e-01
 ; VEC1_INTERL2-NEXT:    [[INDUCTION6:%.*]] = or i64 [[INDEX]], 1
-; VEC1_INTERL2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC1_INTERL2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION6]]
-; VEC1_INTERL2-NEXT:    store float [[OFFSET_IDX]], float* [[TMP11]], align 4
-; VEC1_INTERL2-NEXT:    store float [[TMP8]], float* [[TMP12]], align 4
+; VEC1_INTERL2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC1_INTERL2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDUCTION6]]
+; VEC1_INTERL2-NEXT:    store float [[OFFSET_IDX]], ptr [[TMP11]], align 4
+; VEC1_INTERL2-NEXT:    store float [[TMP8]], ptr [[TMP12]], align 4
 ; VEC1_INTERL2-NEXT:    [[TMP13:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP0]]
 ; VEC1_INTERL2-NEXT:    [[TMP14:%.*]] = fadd fast float [[TMP8]], [[TMP0]]
 ; VEC1_INTERL2-NEXT:    [[TMP15:%.*]] = fadd fast float [[TMP10]], 0xBFD99999A0000000
 ; VEC1_INTERL2-NEXT:    [[TMP16:%.*]] = fadd fast float [[TMP10]], 0xBFECCCCCC0000000
 ; VEC1_INTERL2-NEXT:    [[TMP17:%.*]] = fadd fast float [[TMP15]], [[TMP13]]
 ; VEC1_INTERL2-NEXT:    [[TMP18:%.*]] = fadd fast float [[TMP16]], [[TMP14]]
-; VEC1_INTERL2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; VEC1_INTERL2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDUCTION6]]
-; VEC1_INTERL2-NEXT:    store float [[TMP17]], float* [[TMP19]], align 4
-; VEC1_INTERL2-NEXT:    store float [[TMP18]], float* [[TMP20]], align 4
-; VEC1_INTERL2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]]
-; VEC1_INTERL2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDUCTION6]]
-; VEC1_INTERL2-NEXT:    store float [[TMP15]], float* [[TMP21]], align 4
-; VEC1_INTERL2-NEXT:    store float [[TMP16]], float* [[TMP22]], align 4
+; VEC1_INTERL2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; VEC1_INTERL2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDUCTION6]]
+; VEC1_INTERL2-NEXT:    store float [[TMP17]], ptr [[TMP19]], align 4
+; VEC1_INTERL2-NEXT:    store float [[TMP18]], ptr [[TMP20]], align 4
+; VEC1_INTERL2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]]
+; VEC1_INTERL2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDUCTION6]]
+; VEC1_INTERL2-NEXT:    store float [[TMP15]], ptr [[TMP21]], align 4
+; VEC1_INTERL2-NEXT:    store float [[TMP16]], ptr [[TMP22]], align 4
 ; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC1_INTERL2-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; VEC1_INTERL2-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -1024,15 +1003,15 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC1_INTERL2-NEXT:    [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ]
 ; VEC1_INTERL2-NEXT:    [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC1_INTERL2-NEXT:    store float [[X_011]], float* [[ARRAYIDX]], align 4
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    store float [[X_011]], ptr [[ARRAYIDX]], align 4
 ; VEC1_INTERL2-NEXT:    [[ADD]] = fadd fast float [[X_011]], [[TMP0]]
 ; VEC1_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01
 ; VEC1_INTERL2-NEXT:    [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]]
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
-; VEC1_INTERL2-NEXT:    store float [[ADD2]], float* [[ARRAYIDX4]], align 4
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]]
-; VEC1_INTERL2-NEXT:    store float [[CONV1]], float* [[ARRAYIDX6]], align 4
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    store float [[ADD2]], ptr [[ARRAYIDX4]], align 4
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    store float [[CONV1]], ptr [[ARRAYIDX6]], align 4
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC1_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
@@ -1047,7 +1026,7 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC2_INTERL1_PRED_STORE:       for.body.lr.ph:
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP0:%.*]] = load float, float* @fp_inc, align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP0:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
@@ -1077,18 +1056,15 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND:%.*]] = phi <2 x float> [ <float 0x3FB99999A0000000, float 0xBFD99999A0000000>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND9:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <2 x float>*
-; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND9]], <2 x float>* [[TMP9]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND9]], ptr [[TMP8]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP10:%.*]] = fadd fast <2 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP11:%.*]] = fadd fast <2 x float> [[VEC_IND]], <float -5.000000e-01, float -5.000000e-01>
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP12:%.*]] = fadd fast <2 x float> [[TMP11]], [[TMP10]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP14:%.*]] = bitcast float* [[TMP13]] to <2 x float>*
-; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[TMP12]], <2 x float>* [[TMP14]], align 4
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP16:%.*]] = bitcast float* [[TMP15]] to <2 x float>*
-; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[TMP11]], <2 x float>* [[TMP16]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[TMP12]], ptr [[TMP13]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[TMP11]], ptr [[TMP15]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], <float -1.000000e+00, float -1.000000e+00>
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT10]] = fadd fast <2 x float> [[VEC_IND9]], [[DOTSPLAT8]]
@@ -1101,15 +1077,15 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[Y_012:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[X_011:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_011]], float* [[ARRAYIDX]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_011]], ptr [[ARRAYIDX]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[ADD]] = fadd fast float [[X_011]], [[TMP0]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[ADD2]], float* [[ARRAYIDX4]], align 4
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[CONV1]], float* [[ARRAYIDX6]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[ADD2]], ptr [[ARRAYIDX4]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[CONV1]], ptr [[ARRAYIDX6]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
@@ -1122,22 +1098,22 @@ entry:
   br i1 %cmp9, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:                                   ; preds = %entry
-  %0 = load float, float* @fp_inc, align 4
+  %0 = load float, ptr @fp_inc, align 4
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %y.012 = phi float [ 0x3FB99999A0000000, %for.body.lr.ph ], [ %conv1, %for.body ]
   %x.011 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  store float %x.011, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  store float %x.011, ptr %arrayidx, align 4
   %add = fadd fast float %x.011, %0
   %conv1 = fadd fast float %y.012, -5.000000e-01
   %add2 = fadd fast float %conv1, %add
-  %arrayidx4 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  store float %add2, float* %arrayidx4, align 4
-  %arrayidx6 = getelementptr inbounds float, float* %C, i64 %indvars.iv
-  store float %conv1, float* %arrayidx6, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  store float %add2, ptr %arrayidx4, align 4
+  %arrayidx6 = getelementptr inbounds float, ptr %C, i64 %indvars.iv
+  store float %conv1, ptr %arrayidx6, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %N
@@ -1151,7 +1127,7 @@ for.end:
 }
 
 ; Start and step values are constants. There is no 'fmul' operation in this case
-;void fp_iv_loop4(float * __restrict__ A, int N) {
+;void fp_iv_loop4(ptr __restrict__ A, int N) {
 ;  float x = 1.0;
 ;  for (int i=0; i < N; ++i) {
 ;    A[i] = x;
@@ -1160,7 +1136,7 @@ for.end:
 ;}
 
 
-define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
+define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) {
 ; VEC4_INTERL1-LABEL: @fp_iv_loop4(
 ; VEC4_INTERL1-NEXT:  entry:
 ; VEC4_INTERL1-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -1180,9 +1156,8 @@ define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
 ; VEC4_INTERL1:       vector.body:
 ; VEC4_INTERL1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC4_INTERL1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL1-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>*
-; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP5]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP4]], align 4
 ; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
 ; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -1197,8 +1172,8 @@ define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
 ; VEC4_INTERL1:       for.body:
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC4_INTERL1-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC4_INTERL1-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL1-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -1229,12 +1204,10 @@ define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
 ; VEC4_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP5]], align 4
-; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 4
-; VEC4_INTERL2-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP7]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP4]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 4
+; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], ptr [[TMP6]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
 ; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -1249,8 +1222,8 @@ define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
 ; VEC4_INTERL2:       for.body:
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC4_INTERL2-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC4_INTERL2-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -1284,10 +1257,10 @@ define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
 ; VEC1_INTERL2-NEXT:    [[OFFSET_IDX:%.*]] = fadd fast float [[TMP5]], 1.000000e+00
 ; VEC1_INTERL2-NEXT:    [[TMP6:%.*]] = fadd fast float [[TMP5]], 1.500000e+00
 ; VEC1_INTERL2-NEXT:    [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1
-; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]]
-; VEC1_INTERL2-NEXT:    store float [[OFFSET_IDX]], float* [[TMP7]], align 4
-; VEC1_INTERL2-NEXT:    store float [[TMP6]], float* [[TMP8]], align 4
+; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDUCTION2]]
+; VEC1_INTERL2-NEXT:    store float [[OFFSET_IDX]], ptr [[TMP7]], align 4
+; VEC1_INTERL2-NEXT:    store float [[TMP6]], ptr [[TMP8]], align 4
 ; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC1_INTERL2-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; VEC1_INTERL2-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
@@ -1301,8 +1274,8 @@ define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
 ; VEC1_INTERL2:       for.body:
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC1_INTERL2-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC1_INTERL2-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC1_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -1332,9 +1305,8 @@ define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
 ; VEC2_INTERL1_PRED_STORE:       vector.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND:%.*]] = phi <2 x float> [ <float 1.000000e+00, float 1.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <2 x float>*
-; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], <2 x float>* [[TMP5]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], ptr [[TMP4]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], <float 1.000000e+00, float 1.000000e+00>
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -1345,8 +1317,8 @@ define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
 ; VEC2_INTERL1_PRED_STORE:       for.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -1365,8 +1337,8 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  store float %x.06, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  store float %x.06, ptr %arrayidx, align 4
   %conv1 = fadd fast float %x.06, 5.000000e-01
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -1381,7 +1353,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 }
 
 
-define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
+define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC4_INTERL1-LABEL: @non_primary_iv_float_scalar(
 ; VEC4_INTERL1-NEXT:  entry:
 ; VEC4_INTERL1-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
@@ -1394,15 +1366,14 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL1:       vector.body:
 ; VEC4_INTERL1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ]
 ; VEC4_INTERL1-NEXT:    [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float
-; VEC4_INTERL1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL1-NEXT:    [[TMP2:%.*]] = bitcast float* [[TMP1]] to <4 x float>*
-; VEC4_INTERL1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
 ; VEC4_INTERL1-NEXT:    [[TMP3:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer
 ; VEC4_INTERL1-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0
 ; VEC4_INTERL1-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VEC4_INTERL1:       pred.store.if:
-; VEC4_INTERL1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]]
-; VEC4_INTERL1-NEXT:    store float [[TMP0]], float* [[TMP5]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; VEC4_INTERL1-NEXT:    store float [[TMP0]], ptr [[TMP5]], align 4
 ; VEC4_INTERL1-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VEC4_INTERL1:       pred.store.continue:
 ; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1
@@ -1410,8 +1381,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL1:       pred.store.if3:
 ; VEC4_INTERL1-NEXT:    [[TMP8:%.*]] = or i64 [[INDEX]], 1
 ; VEC4_INTERL1-NEXT:    [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00
-; VEC4_INTERL1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP8]]
-; VEC4_INTERL1-NEXT:    store float [[TMP7]], float* [[TMP9]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]]
+; VEC4_INTERL1-NEXT:    store float [[TMP7]], ptr [[TMP9]], align 4
 ; VEC4_INTERL1-NEXT:    br label [[PRED_STORE_CONTINUE3]]
 ; VEC4_INTERL1:       pred.store.continue4:
 ; VEC4_INTERL1-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2
@@ -1419,8 +1390,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL1:       pred.store.if5:
 ; VEC4_INTERL1-NEXT:    [[TMP12:%.*]] = or i64 [[INDEX]], 2
 ; VEC4_INTERL1-NEXT:    [[TMP11:%.*]] = fadd fast float [[TMP0]], 2.000000e+00
-; VEC4_INTERL1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]]
-; VEC4_INTERL1-NEXT:    store float [[TMP11]], float* [[TMP13]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP12]]
+; VEC4_INTERL1-NEXT:    store float [[TMP11]], ptr [[TMP13]], align 4
 ; VEC4_INTERL1-NEXT:    br label [[PRED_STORE_CONTINUE5]]
 ; VEC4_INTERL1:       pred.store.continue6:
 ; VEC4_INTERL1-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3
@@ -1428,8 +1399,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL1:       pred.store.if7:
 ; VEC4_INTERL1-NEXT:    [[TMP16:%.*]] = or i64 [[INDEX]], 3
 ; VEC4_INTERL1-NEXT:    [[TMP15:%.*]] = fadd fast float [[TMP0]], 3.000000e+00
-; VEC4_INTERL1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]]
-; VEC4_INTERL1-NEXT:    store float [[TMP15]], float* [[TMP17]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP16]]
+; VEC4_INTERL1-NEXT:    store float [[TMP15]], ptr [[TMP17]], align 4
 ; VEC4_INTERL1-NEXT:    br label [[PRED_STORE_CONTINUE7]]
 ; VEC4_INTERL1:       pred.store.continue8:
 ; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -1445,12 +1416,12 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL1:       for.body:
 ; VEC4_INTERL1-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC4_INTERL1-NEXT:    [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC4_INTERL1-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
-; VEC4_INTERL1-NEXT:    [[VAR1:%.*]] = load float, float* [[VAR0]], align 4
+; VEC4_INTERL1-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]]
+; VEC4_INTERL1-NEXT:    [[VAR1:%.*]] = load float, ptr [[VAR0]], align 4
 ; VEC4_INTERL1-NEXT:    [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00
 ; VEC4_INTERL1-NEXT:    br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]]
 ; VEC4_INTERL1:       if.pred:
-; VEC4_INTERL1-NEXT:    store float [[J]], float* [[VAR0]], align 4
+; VEC4_INTERL1-NEXT:    store float [[J]], ptr [[VAR0]], align 4
 ; VEC4_INTERL1-NEXT:    br label [[FOR_INC]]
 ; VEC4_INTERL1:       for.inc:
 ; VEC4_INTERL1-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -1473,19 +1444,17 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ]
 ; VEC4_INTERL2-NEXT:    [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float
 ; VEC4_INTERL2-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 4
-; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
-; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 4
-; VEC4_INTERL2-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x float>, <4 x float>* [[TMP5]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4
+; VEC4_INTERL2-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
 ; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer
 ; VEC4_INTERL2-NEXT:    [[TMP7:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD2]], zeroinitializer
 ; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP6]], i64 0
 ; VEC4_INTERL2-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VEC4_INTERL2:       pred.store.if:
-; VEC4_INTERL2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    store float [[TMP0]], float* [[TMP9]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; VEC4_INTERL2-NEXT:    store float [[TMP0]], ptr [[TMP9]], align 4
 ; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VEC4_INTERL2:       pred.store.continue:
 ; VEC4_INTERL2-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i64 1
@@ -1493,8 +1462,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL2:       pred.store.if4:
 ; VEC4_INTERL2-NEXT:    [[TMP12:%.*]] = or i64 [[INDEX]], 1
 ; VEC4_INTERL2-NEXT:    [[TMP11:%.*]] = fadd fast float [[TMP0]], 1.000000e+00
-; VEC4_INTERL2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]]
-; VEC4_INTERL2-NEXT:    store float [[TMP11]], float* [[TMP13]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP12]]
+; VEC4_INTERL2-NEXT:    store float [[TMP11]], ptr [[TMP13]], align 4
 ; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; VEC4_INTERL2:       pred.store.continue5:
 ; VEC4_INTERL2-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP6]], i64 2
@@ -1502,8 +1471,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL2:       pred.store.if6:
 ; VEC4_INTERL2-NEXT:    [[TMP16:%.*]] = or i64 [[INDEX]], 2
 ; VEC4_INTERL2-NEXT:    [[TMP15:%.*]] = fadd fast float [[TMP0]], 2.000000e+00
-; VEC4_INTERL2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]]
-; VEC4_INTERL2-NEXT:    store float [[TMP15]], float* [[TMP17]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP16]]
+; VEC4_INTERL2-NEXT:    store float [[TMP15]], ptr [[TMP17]], align 4
 ; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; VEC4_INTERL2:       pred.store.continue7:
 ; VEC4_INTERL2-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP6]], i64 3
@@ -1511,16 +1480,16 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL2:       pred.store.if8:
 ; VEC4_INTERL2-NEXT:    [[TMP20:%.*]] = or i64 [[INDEX]], 3
 ; VEC4_INTERL2-NEXT:    [[TMP19:%.*]] = fadd fast float [[TMP0]], 3.000000e+00
-; VEC4_INTERL2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP20]]
-; VEC4_INTERL2-NEXT:    store float [[TMP19]], float* [[TMP21]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP20]]
+; VEC4_INTERL2-NEXT:    store float [[TMP19]], ptr [[TMP21]], align 4
 ; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE8]]
 ; VEC4_INTERL2:       pred.store.continue9:
 ; VEC4_INTERL2-NEXT:    [[TMP22:%.*]] = extractelement <4 x i1> [[TMP7]], i64 0
 ; VEC4_INTERL2-NEXT:    br i1 [[TMP22]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
 ; VEC4_INTERL2:       pred.store.if10:
 ; VEC4_INTERL2-NEXT:    [[TMP23:%.*]] = fadd fast float [[TMP0]], 4.000000e+00
-; VEC4_INTERL2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP1]]
-; VEC4_INTERL2-NEXT:    store float [[TMP23]], float* [[TMP24]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; VEC4_INTERL2-NEXT:    store float [[TMP23]], ptr [[TMP24]], align 4
 ; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE10]]
 ; VEC4_INTERL2:       pred.store.continue11:
 ; VEC4_INTERL2-NEXT:    [[TMP25:%.*]] = extractelement <4 x i1> [[TMP7]], i64 1
@@ -1528,8 +1497,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL2:       pred.store.if12:
 ; VEC4_INTERL2-NEXT:    [[TMP27:%.*]] = or i64 [[INDEX]], 5
 ; VEC4_INTERL2-NEXT:    [[TMP26:%.*]] = fadd fast float [[TMP0]], 5.000000e+00
-; VEC4_INTERL2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP27]]
-; VEC4_INTERL2-NEXT:    store float [[TMP26]], float* [[TMP28]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP27]]
+; VEC4_INTERL2-NEXT:    store float [[TMP26]], ptr [[TMP28]], align 4
 ; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE12]]
 ; VEC4_INTERL2:       pred.store.continue13:
 ; VEC4_INTERL2-NEXT:    [[TMP29:%.*]] = extractelement <4 x i1> [[TMP7]], i64 2
@@ -1537,8 +1506,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL2:       pred.store.if14:
 ; VEC4_INTERL2-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 6
 ; VEC4_INTERL2-NEXT:    [[TMP30:%.*]] = fadd fast float [[TMP0]], 6.000000e+00
-; VEC4_INTERL2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP31]]
-; VEC4_INTERL2-NEXT:    store float [[TMP30]], float* [[TMP32]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]]
+; VEC4_INTERL2-NEXT:    store float [[TMP30]], ptr [[TMP32]], align 4
 ; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE14]]
 ; VEC4_INTERL2:       pred.store.continue15:
 ; VEC4_INTERL2-NEXT:    [[TMP33:%.*]] = extractelement <4 x i1> [[TMP7]], i64 3
@@ -1546,8 +1515,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL2:       pred.store.if16:
 ; VEC4_INTERL2-NEXT:    [[TMP35:%.*]] = or i64 [[INDEX]], 7
 ; VEC4_INTERL2-NEXT:    [[TMP34:%.*]] = fadd fast float [[TMP0]], 7.000000e+00
-; VEC4_INTERL2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP35]]
-; VEC4_INTERL2-NEXT:    store float [[TMP34]], float* [[TMP36]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP35]]
+; VEC4_INTERL2-NEXT:    store float [[TMP34]], ptr [[TMP36]], align 4
 ; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE16]]
 ; VEC4_INTERL2:       pred.store.continue17:
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -1563,12 +1532,12 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL2:       for.body:
 ; VEC4_INTERL2-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC4_INTERL2-NEXT:    [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC4_INTERL2-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
-; VEC4_INTERL2-NEXT:    [[VAR1:%.*]] = load float, float* [[VAR0]], align 4
+; VEC4_INTERL2-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]]
+; VEC4_INTERL2-NEXT:    [[VAR1:%.*]] = load float, ptr [[VAR0]], align 4
 ; VEC4_INTERL2-NEXT:    [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00
 ; VEC4_INTERL2-NEXT:    br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]]
 ; VEC4_INTERL2:       if.pred:
-; VEC4_INTERL2-NEXT:    store float [[J]], float* [[VAR0]], align 4
+; VEC4_INTERL2-NEXT:    store float [[J]], ptr [[VAR0]], align 4
 ; VEC4_INTERL2-NEXT:    br label [[FOR_INC]]
 ; VEC4_INTERL2:       for.inc:
 ; VEC4_INTERL2-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -1591,21 +1560,21 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC1_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
 ; VEC1_INTERL2-NEXT:    [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float
 ; VEC1_INTERL2-NEXT:    [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1
-; VEC1_INTERL2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC1_INTERL2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]]
-; VEC1_INTERL2-NEXT:    [[TMP3:%.*]] = load float, float* [[TMP1]], align 4
-; VEC1_INTERL2-NEXT:    [[TMP4:%.*]] = load float, float* [[TMP2]], align 4
+; VEC1_INTERL2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC1_INTERL2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDUCTION2]]
+; VEC1_INTERL2-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1]], align 4
+; VEC1_INTERL2-NEXT:    [[TMP4:%.*]] = load float, ptr [[TMP2]], align 4
 ; VEC1_INTERL2-NEXT:    [[TMP5:%.*]] = fcmp fast oeq float [[TMP3]], 0.000000e+00
 ; VEC1_INTERL2-NEXT:    [[TMP6:%.*]] = fcmp fast oeq float [[TMP4]], 0.000000e+00
 ; VEC1_INTERL2-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VEC1_INTERL2:       pred.store.if:
-; VEC1_INTERL2-NEXT:    store float [[TMP0]], float* [[TMP1]], align 4
+; VEC1_INTERL2-NEXT:    store float [[TMP0]], ptr [[TMP1]], align 4
 ; VEC1_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VEC1_INTERL2:       pred.store.continue:
 ; VEC1_INTERL2-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
 ; VEC1_INTERL2:       pred.store.if3:
 ; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00
-; VEC1_INTERL2-NEXT:    store float [[TMP7]], float* [[TMP2]], align 4
+; VEC1_INTERL2-NEXT:    store float [[TMP7]], ptr [[TMP2]], align 4
 ; VEC1_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; VEC1_INTERL2:       pred.store.continue4:
 ; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -1621,12 +1590,12 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC1_INTERL2:       for.body:
 ; VEC1_INTERL2-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC1_INTERL2-NEXT:    [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC1_INTERL2-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
-; VEC1_INTERL2-NEXT:    [[VAR1:%.*]] = load float, float* [[VAR0]], align 4
+; VEC1_INTERL2-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]]
+; VEC1_INTERL2-NEXT:    [[VAR1:%.*]] = load float, ptr [[VAR0]], align 4
 ; VEC1_INTERL2-NEXT:    [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00
 ; VEC1_INTERL2-NEXT:    br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]]
 ; VEC1_INTERL2:       if.pred:
-; VEC1_INTERL2-NEXT:    store float [[J]], float* [[VAR0]], align 4
+; VEC1_INTERL2-NEXT:    store float [[J]], ptr [[VAR0]], align 4
 ; VEC1_INTERL2-NEXT:    br label [[FOR_INC]]
 ; VEC1_INTERL2:       for.inc:
 ; VEC1_INTERL2-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -1648,15 +1617,14 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC2_INTERL1_PRED_STORE:       vector.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP2:%.*]] = bitcast float* [[TMP1]] to <2 x float>*
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP1]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP3:%.*]] = fcmp fast oeq <2 x float> [[WIDE_LOAD]], zeroinitializer
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i64 0
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VEC2_INTERL1_PRED_STORE:       pred.store.if:
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[TMP0]], float* [[TMP5]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[TMP0]], ptr [[TMP5]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VEC2_INTERL1_PRED_STORE:       pred.store.continue:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP3]], i64 1
@@ -1664,8 +1632,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC2_INTERL1_PRED_STORE:       pred.store.if3:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP8:%.*]] = or i64 [[INDEX]], 1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP8]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[TMP7]], float* [[TMP9]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[TMP7]], ptr [[TMP9]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br label [[PRED_STORE_CONTINUE3]]
 ; VEC2_INTERL1_PRED_STORE:       pred.store.continue4:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -1677,12 +1645,12 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC2_INTERL1_PRED_STORE:       for.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[CAST_VTC]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[VAR1:%.*]] = load float, float* [[VAR0]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[VAR1:%.*]] = load float, ptr [[VAR0]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]]
 ; VEC2_INTERL1_PRED_STORE:       if.pred:
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[J]], float* [[VAR0]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[J]], ptr [[VAR0]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br label [[FOR_INC]]
 ; VEC2_INTERL1_PRED_STORE:       for.inc:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -1698,13 +1666,13 @@ entry:
 for.body:
   %i = phi i64 [ %i.next, %for.inc ], [ 0, %entry ]
   %j = phi float [ %j.next, %for.inc ], [ 0.0, %entry ]
-  %var0 = getelementptr inbounds float, float* %A, i64 %i
-  %var1 = load float, float* %var0, align 4
+  %var0 = getelementptr inbounds float, ptr %A, i64 %i
+  %var1 = load float, ptr %var0, align 4
   %var2 = fcmp fast oeq float %var1, 0.0
   br i1 %var2, label %if.pred, label %for.inc
 
 if.pred:
-  store float %j, float* %var0, align 4
+  store float %j, ptr %var0, align 4
   br label %for.inc
 
 for.inc:

diff  --git a/llvm/test/Transforms/LoopVectorize/float-reduction.ll b/llvm/test/Transforms/LoopVectorize/float-reduction.ll
index 28dc2096a6f56..ea8c0a03c9548 100644
--- a/llvm/test/Transforms/LoopVectorize/float-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-reduction.ll
@@ -4,15 +4,15 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK-LABEL: @foo(
 ;CHECK: fadd fast <4 x float>
 ;CHECK: ret
-define float @foo(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
+define float @foo(ptr nocapture %A, ptr nocapture %n) nounwind uwtable readonly ssp {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %sum.04 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd fast float %sum.04, %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -26,15 +26,15 @@ for.end:                                          ; preds = %for.body
 ;CHECK-LABEL: @foosub(
 ;CHECK: fsub fast <4 x float>
 ;CHECK: ret
-define float @foosub(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
+define float @foosub(ptr nocapture %A, ptr nocapture %n) nounwind uwtable readonly ssp {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %sum.04 = phi float [ 0.000000e+00, %entry ], [ %sub, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %sub = fsub fast float %sum.04, %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -48,15 +48,15 @@ for.end:                                          ; preds = %for.body
 ;CHECK-LABEL: @foodiv(
 ;CHECK: fdiv fast <4 x float>
 ;CHECK: ret
-define float @foodiv(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
+define float @foodiv(ptr nocapture %A, ptr nocapture %n) nounwind uwtable readonly ssp {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %sum.04 = phi float [ 1.000000e+00, %entry ], [ %sub, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %sub = fdiv fast float %sum.04, %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -70,15 +70,15 @@ for.end:                                          ; preds = %for.body
 ;CHECK-LABEL: @foonodiv(
 ;CHECK-NOT: fdiv fast <4 x float>
 ;CHECK: ret
-define float @foonodiv(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
+define float @foonodiv(ptr nocapture %A, ptr nocapture %n) nounwind uwtable readonly ssp {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %sum.04 = phi float [ 1.000000e+00, %entry ], [ %sub, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %sub = fdiv fast float %0, %sum.04
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32

diff  --git a/llvm/test/Transforms/LoopVectorize/fneg.ll b/llvm/test/Transforms/LoopVectorize/fneg.ll
index bc8906e35e0f1..2b2627a100939 100644
--- a/llvm/test/Transforms/LoopVectorize/fneg.ll
+++ b/llvm/test/Transforms/LoopVectorize/fneg.ll
@@ -1,20 +1,20 @@
 ; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
 
-define void @foo(float* %a, i64 %n) {
+define void @foo(ptr %a, i64 %n) {
 ; CHECK:       vector.body:
-; CHECK:         [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 4
+; CHECK:         [[WIDE_LOAD:%.*]] = load <4 x float>, ptr {{.*}}, align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg <4 x float> [[WIDE_LOAD]]
-; CHECK:         store <4 x float> [[TMP4]], <4 x float>* {{.*}}, align 4
+; CHECK:         store <4 x float> [[TMP4]], ptr {{.*}}, align 4
 ;
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %sub = fneg float %0
-  store float %sub, float* %arrayidx, align 4
+  store float %sub, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %cmp = icmp eq i64 %indvars.iv.next, %n
   br i1 %cmp, label %for.exit, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/followup.ll b/llvm/test/Transforms/LoopVectorize/followup.ll
index 56f093aa2b103..19db210e83d8b 100644
--- a/llvm/test/Transforms/LoopVectorize/followup.ll
+++ b/llvm/test/Transforms/LoopVectorize/followup.ll
@@ -4,16 +4,16 @@
 ;
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define void @followup(i32* nocapture %a, i32 %n) {
+define void @followup(ptr nocapture %a, i32 %n) {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
   %0 = trunc i64 %indvars.iv to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n

diff  --git a/llvm/test/Transforms/LoopVectorize/forked-pointers.ll b/llvm/test/Transforms/LoopVectorize/forked-pointers.ll
index dccf338d600a2..dc795eec4f5f8 100644
--- a/llvm/test/Transforms/LoopVectorize/forked-pointers.ll
+++ b/llvm/test/Transforms/LoopVectorize/forked-pointers.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 ;;;; Derived from the following C code
-;; void forked_ptrs_
diff erent_base_same_offset(float *A, float *B, float *C, int *D) {
+;; void forked_ptrs_
diff erent_base_same_offset(ptr A, ptr B, ptr C, int *D) {
 ;;   for (int i=0; i<100; i++) {
 ;;     if (D[i] != 0) {
 ;;       C[i] = A[i];
@@ -14,18 +14,18 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 ;;   }
 ;; }
 
-define dso_local void @forked_ptrs_
diff erent_base_same_offset(float* nocapture readonly %Base1, float* nocapture readonly %Base2, float* nocapture %Dest, i32* nocapture readonly %Preds) {
+define dso_local void @forked_ptrs_
diff erent_base_same_offset(ptr nocapture readonly %Base1, ptr nocapture readonly %Base2, ptr nocapture %Dest, ptr nocapture readonly %Preds) {
 ; CHECK-LABEL: @forked_ptrs_
diff erent_base_same_offset(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[BASE1_FR:%.*]] = freeze float* [[BASE1:%.*]]
-; CHECK-NEXT:    [[BASE2_FR:%.*]] = freeze float* [[BASE2:%.*]]
-; CHECK-NEXT:    [[DEST_FR:%.*]] = freeze float* [[DEST:%.*]]
+; CHECK-NEXT:    [[BASE1_FR:%.*]] = freeze ptr [[BASE1:%.*]]
+; CHECK-NEXT:    [[BASE2_FR:%.*]] = freeze ptr [[BASE2:%.*]]
+; CHECK-NEXT:    [[DEST_FR:%.*]] = freeze ptr [[DEST:%.*]]
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
 ; CHECK:       vector.memcheck:
-; CHECK-NEXT:    [[DEST1:%.*]] = ptrtoint float* [[DEST_FR]] to i64
-; CHECK-NEXT:    [[PREDS2:%.*]] = ptrtoint i32* [[PREDS:%.*]] to i64
-; CHECK-NEXT:    [[BASE23:%.*]] = ptrtoint float* [[BASE2_FR]] to i64
-; CHECK-NEXT:    [[BASE15:%.*]] = ptrtoint float* [[BASE1_FR]] to i64
+; CHECK-NEXT:    [[DEST1:%.*]] = ptrtoint ptr [[DEST_FR]] to i64
+; CHECK-NEXT:    [[PREDS2:%.*]] = ptrtoint ptr [[PREDS:%.*]] to i64
+; CHECK-NEXT:    [[BASE23:%.*]] = ptrtoint ptr [[BASE2_FR]] to i64
+; CHECK-NEXT:    [[BASE15:%.*]] = ptrtoint ptr [[BASE1_FR]] to i64
 ; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 [[DEST1]], [[PREDS2]]
 ; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 [[DEST1]], [[BASE23]]
@@ -36,40 +36,38 @@ define dso_local void @forked_ptrs_
diff erent_base_same_offset(float* nocapture r
 ; CHECK-NEXT:    [[CONFLICT_RDX8:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK7]]
 ; CHECK-NEXT:    br i1 [[CONFLICT_RDX8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float*> poison, float* [[BASE2_FR]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float*> [[BROADCAST_SPLATINSERT]], <4 x float*> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x float*> poison, float* [[BASE1_FR]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x float*> [[BROADCAST_SPLATINSERT9]], <4 x float*> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[BASE2_FR]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x ptr> poison, ptr [[BASE1_FR]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT9]], <4 x ptr> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP5:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[PREDS]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[PREDS]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x float*> [[BROADCAST_SPLAT]], <4 x float*> [[BROADCAST_SPLAT10]]
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float*> [[TMP9]], i64 0
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[TMP10]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x float*> [[TMP9]], i64 1
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x float*> [[TMP9]], i64 2
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP4]]
-; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x float*> [[TMP9]], i64 3
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[TMP5]]
-; CHECK-NEXT:    [[TMP18:%.*]] = load float, float* [[TMP11]], align 4
-; CHECK-NEXT:    [[TMP19:%.*]] = load float, float* [[TMP13]], align 4
-; CHECK-NEXT:    [[TMP20:%.*]] = load float, float* [[TMP15]], align 4
-; CHECK-NEXT:    [[TMP21:%.*]] = load float, float* [[TMP17]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x ptr> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT10]]
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x ptr> [[TMP9]], i64 0
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x ptr> [[TMP9]], i64 1
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x ptr> [[TMP9]], i64 2
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x ptr> [[TMP9]], i64 3
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP11]], align 4
+; CHECK-NEXT:    [[TMP19:%.*]] = load float, ptr [[TMP13]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = load float, ptr [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP21:%.*]] = load float, ptr [[TMP17]], align 4
 ; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x float> poison, float [[TMP18]], i64 0
 ; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <4 x float> [[TMP22]], float [[TMP19]], i64 1
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP23]], float [[TMP20]], i64 2
 ; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <4 x float> [[TMP24]], float [[TMP21]], i64 3
-; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, float* [[DEST_FR]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP27:%.*]] = bitcast float* [[TMP26]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP25]], <4 x float>* [[TMP27]], align 4
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, ptr [[DEST_FR]], i64 [[INDEX]]
+; CHECK-NEXT:    store <4 x float> [[TMP25]], ptr [[TMP26]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -82,14 +80,14 @@ define dso_local void @forked_ptrs_
diff erent_base_same_offset(float* nocapture r
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[PREDS]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP29:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[PREDS]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP29]], 0
-; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[CMP1_NOT]], float* [[BASE2_FR]], float* [[BASE1_FR]]
-; CHECK-NEXT:    [[DOTSINK_IN:%.*]] = getelementptr inbounds float, float* [[SPEC_SELECT]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[DOTSINK:%.*]] = load float, float* [[DOTSINK_IN]], align 4
-; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, float* [[DEST_FR]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store float [[DOTSINK]], float* [[TMP30]], align 4
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[CMP1_NOT]], ptr [[BASE2_FR]], ptr [[BASE1_FR]]
+; CHECK-NEXT:    [[DOTSINK_IN:%.*]] = getelementptr inbounds float, ptr [[SPEC_SELECT]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[DOTSINK:%.*]] = load float, ptr [[DOTSINK_IN]], align 4
+; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, ptr [[DEST_FR]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store float [[DOTSINK]], ptr [[TMP30]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -102,21 +100,21 @@ for.cond.cleanup:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1.not = icmp eq i32 %0, 0
-  %spec.select = select i1 %cmp1.not, float* %Base2, float* %Base1
-  %.sink.in = getelementptr inbounds float, float* %spec.select, i64 %indvars.iv
-  %.sink = load float, float* %.sink.in, align 4
-  %1 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
-  store float %.sink, float* %1, align 4
+  %spec.select = select i1 %cmp1.not, ptr %Base2, ptr %Base1
+  %.sink.in = getelementptr inbounds float, ptr %spec.select, i64 %indvars.iv
+  %.sink = load float, ptr %.sink.in, align 4
+  %1 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv
+  store float %.sink, ptr %1, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, 100
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
 
 ;;;; Derived from the following C code
-;; void forked_ptrs_same_base_
diff erent_offset(float *A, float *B, int *C) {
+;; void forked_ptrs_same_base_
diff erent_offset(ptr A, ptr B, int *C) {
 ;;   int offset;
 ;;   for (int i = 0; i < 100; i++) {
 ;;     if (C[i] != 0)
@@ -127,7 +125,7 @@ for.body:
 ;;   }
 ;; }
 
-define dso_local void @forked_ptrs_same_base_
diff erent_offset(float* nocapture readonly %Base, float* nocapture %Dest, i32* nocapture readonly %Preds) {
+define dso_local void @forked_ptrs_same_base_
diff erent_offset(ptr nocapture readonly %Base, ptr nocapture %Dest, ptr nocapture readonly %Preds) {
 ; CHECK-LABEL: @forked_ptrs_same_base_
diff erent_offset(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
@@ -136,18 +134,18 @@ define dso_local void @forked_ptrs_same_base_
diff erent_offset(float* nocapture r
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[I_014:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[PREDS:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[PREDS:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I_014]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
 ; CHECK-NEXT:    [[OFFSET_0:%.*]] = select i1 [[CMP1_NOT]], i32 [[ADD]], i32 [[TMP1]]
 ; CHECK-NEXT:    [[IDXPROM213:%.*]] = zext i32 [[OFFSET_0]] to i64
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[BASE:%.*]], i64 [[IDXPROM213]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* [[ARRAYIDX3]], align 4
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store float [[TMP2]], float* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[BASE:%.*]], i64 [[IDXPROM213]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[DEST:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store float [[TMP2]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
 ;
@@ -160,18 +158,18 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %i.014 = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1.not = icmp eq i32 %0, 0
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %add = add nuw nsw i32 %i.014, 1
   %1 = trunc i64 %indvars.iv to i32
   %offset.0 = select i1 %cmp1.not, i32 %add, i32 %1
   %idxprom213 = zext i32 %offset.0 to i64
-  %arrayidx3 = getelementptr inbounds float, float* %Base, i64 %idxprom213
-  %2 = load float, float* %arrayidx3, align 4
-  %arrayidx5 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
-  store float %2, float* %arrayidx5, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %Base, i64 %idxprom213
+  %2 = load float, ptr %arrayidx3, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv
+  store float %2, ptr %arrayidx5, align 4
   %exitcond.not = icmp eq i64 %indvars.iv.next, 100
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/funcall.ll b/llvm/test/Transforms/LoopVectorize/funcall.ll
index c5d3066151d07..c4e61241c1a85 100644
--- a/llvm/test/Transforms/LoopVectorize/funcall.ll
+++ b/llvm/test/Transforms/LoopVectorize/funcall.ll
@@ -10,16 +10,16 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK-LABEL: @test(
 ; CHECK: <2 x double>
 
-define void @test(double* %d, double %t) {
+define void @test(ptr %d, double %t) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %d, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %d, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %1 = tail call double @llvm.pow.f64(double %0, double %t)
-  store double %1, double* %arrayidx, align 8
+  store double %1, ptr %arrayidx, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 128

diff  --git a/llvm/test/Transforms/LoopVectorize/gcc-examples.ll b/llvm/test/Transforms/LoopVectorize/gcc-examples.ll
index 7f8abf3f0b82a..0b75751836810 100644
--- a/llvm/test/Transforms/LoopVectorize/gcc-examples.ll
+++ b/llvm/test/Transforms/LoopVectorize/gcc-examples.ll
@@ -43,13 +43,13 @@ define void @example1() nounwind uwtable ssp {
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %6, i32* %7, align 4
+  %7 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %6, ptr %7, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 256
@@ -83,8 +83,8 @@ define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
 
 .lr.ph5:                                          ; preds = %0, %.lr.ph5
   %indvars.iv6 = phi i64 [ %indvars.iv.next7, %.lr.ph5 ], [ 0, %0 ]
-  %3 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv6
-  store i32 %x, i32* %3, align 4
+  %3 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv6
+  store i32 %x, ptr %3, align 4
   %indvars.iv.next7 = add i64 %indvars.iv6, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next7 to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -94,13 +94,13 @@ define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ %i.0.lcssa, %.preheader ]
   %.02 = phi i32 [ %4, %.lr.ph ], [ %n, %.preheader ]
   %4 = add nsw i32 %.02, -1
-  %5 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %6 = load i32, i32* %5, align 4
-  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %8 = load i32, i32* %7, align 4
+  %5 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %6 = load i32, ptr %5, align 4
+  %7 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %indvars.iv
+  %8 = load i32, ptr %7, align 4
   %9 = and i32 %8, %6
-  %10 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %9, i32* %10, align 4
+  %10 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %9, ptr %10, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %11 = icmp eq i32 %4, 0
   br i1 %11, label %._crit_edge, label %.lr.ph
@@ -118,19 +118,19 @@ define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
 ;UNROLL: <4 x i32>
 ;UNROLL: <4 x i32>
 ;UNROLL: ret void
-define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
+define void @example3(i32 %n, ptr noalias nocapture %p, ptr noalias nocapture %q) nounwind uwtable ssp {
   %1 = icmp eq i32 %n, 0
   br i1 %1, label %._crit_edge, label %.lr.ph
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %.05 = phi i32 [ %2, %.lr.ph ], [ %n, %0 ]
-  %.014 = phi i32* [ %5, %.lr.ph ], [ %p, %0 ]
-  %.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ]
+  %.014 = phi ptr [ %5, %.lr.ph ], [ %p, %0 ]
+  %.023 = phi ptr [ %3, %.lr.ph ], [ %q, %0 ]
   %2 = add nsw i32 %.05, -1
-  %3 = getelementptr inbounds i32, i32* %.023, i64 1
-  %4 = load i32, i32* %.023, align 16
-  %5 = getelementptr inbounds i32, i32* %.014, i64 1
-  store i32 %4, i32* %.014, align 16
+  %3 = getelementptr inbounds i32, ptr %.023, i64 1
+  %4 = load i32, ptr %.023, align 16
+  %5 = getelementptr inbounds i32, ptr %.014, i64 1
+  store i32 %4, ptr %.014, align 16
   %6 = icmp eq i32 %2, 0
   br i1 %6, label %._crit_edge, label %.lr.ph
 
@@ -147,7 +147,7 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
 ;UNROLL: load <4 x i32>
 ;UNROLL: load <4 x i32>
 ;UNROLL: ret void
-define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
+define void @example4(i32 %n, ptr noalias nocapture %p, ptr noalias nocapture %q) nounwind uwtable ssp {
   %1 = add nsw i32 %n, -1
   %2 = icmp eq i32 %n, 0
   br i1 %2, label %.preheader4, label %.lr.ph10
@@ -158,13 +158,13 @@ define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
 
 .lr.ph10:                                         ; preds = %0, %.lr.ph10
   %4 = phi i32 [ %9, %.lr.ph10 ], [ %1, %0 ]
-  %.018 = phi i32* [ %8, %.lr.ph10 ], [ %p, %0 ]
-  %.027 = phi i32* [ %5, %.lr.ph10 ], [ %q, %0 ]
-  %5 = getelementptr inbounds i32, i32* %.027, i64 1
-  %6 = load i32, i32* %.027, align 16
+  %.018 = phi ptr [ %8, %.lr.ph10 ], [ %p, %0 ]
+  %.027 = phi ptr [ %5, %.lr.ph10 ], [ %q, %0 ]
+  %5 = getelementptr inbounds i32, ptr %.027, i64 1
+  %6 = load i32, ptr %.027, align 16
   %7 = add nsw i32 %6, 5
-  %8 = getelementptr inbounds i32, i32* %.018, i64 1
-  store i32 %7, i32* %.018, align 16
+  %8 = getelementptr inbounds i32, ptr %.018, i64 1
+  store i32 %7, ptr %.018, align 16
   %9 = add nsw i32 %4, -1
   %10 = icmp eq i32 %4, 0
   br i1 %10, label %._crit_edge, label %.lr.ph10
@@ -175,26 +175,26 @@ define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
 .lr.ph6:                                          ; preds = %.preheader4, %.lr.ph6
   %indvars.iv11 = phi i64 [ %indvars.iv.next12, %.lr.ph6 ], [ 0, %.preheader4 ]
   %indvars.iv.next12 = add i64 %indvars.iv11, 1
-  %11 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv.next12
-  %12 = load i32, i32* %11, align 4
+  %11 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv.next12
+  %12 = load i32, ptr %11, align 4
   %13 = add nsw i64 %indvars.iv11, 3
-  %14 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %13
-  %15 = load i32, i32* %14, align 4
+  %14 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %13
+  %15 = load i32, ptr %14, align 4
   %16 = add nsw i32 %15, %12
-  %17 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv11
-  store i32 %16, i32* %17, align 4
+  %17 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv11
+  store i32 %16, ptr %17, align 4
   %lftr.wideiv13 = trunc i64 %indvars.iv.next12 to i32
   %exitcond14 = icmp eq i32 %lftr.wideiv13, %1
   br i1 %exitcond14, label %.preheader, label %.lr.ph6
 
 .lr.ph:                                           ; preds = %.preheader, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.preheader ]
-  %18 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  %19 = load i32, i32* %18, align 4
+  %18 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %19 = load i32, ptr %18, align 4
   %20 = icmp sgt i32 %19, 4
   %21 = select i1 %20, i32 4, i32 0
-  %22 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  store i32 %21, i32* %22, align 4
+  %22 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  store i32 %21, ptr %22, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %1
@@ -222,8 +222,8 @@ define void @example8(i32 %x) nounwind uwtable ssp {
 
 ; <label>:1                                       ; preds = %1, %.preheader
   %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [32 x [1024 x i32]], [32 x [1024 x i32]]* @G, i64 0, i64 %indvars.iv3, i64 %indvars.iv
-  store i32 %x, i32* %2, align 4
+  %2 = getelementptr inbounds [32 x [1024 x i32]], ptr @G, i64 0, i64 %indvars.iv3, i64 %indvars.iv
+  store i32 %x, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
@@ -248,10 +248,10 @@ define i32 @example9() nounwind uwtable readonly ssp {
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %
diff .01 = phi i32 [ 0, %0 ], [ %7, %1 ]
-  %2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @ub, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds [1024 x i32], ptr @ub, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds [1024 x i32], ptr @uc, i64 0, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add i32 %3, %
diff .01
   %7 = sub i32 %6, %5
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -270,25 +270,25 @@ define i32 @example9() nounwind uwtable readonly ssp {
 ;CHECK: add <4 x i16>
 ;CHECK: store <4 x i16>
 ;CHECK: ret void
-define void @example10a(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
+define void @example10a(ptr noalias nocapture %sa, ptr noalias nocapture %sb, ptr noalias nocapture %sc, ptr noalias nocapture %ia, ptr noalias nocapture %ib, ptr noalias nocapture %ic) nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds i32, i32* %ib, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds i32, i32* %ic, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds i32, ptr %ib, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds i32, ptr %ic, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds i32, i32* %ia, i64 %indvars.iv
-  store i32 %6, i32* %7, align 4
-  %8 = getelementptr inbounds i16, i16* %sb, i64 %indvars.iv
-  %9 = load i16, i16* %8, align 2
-  %10 = getelementptr inbounds i16, i16* %sc, i64 %indvars.iv
-  %11 = load i16, i16* %10, align 2
+  %7 = getelementptr inbounds i32, ptr %ia, i64 %indvars.iv
+  store i32 %6, ptr %7, align 4
+  %8 = getelementptr inbounds i16, ptr %sb, i64 %indvars.iv
+  %9 = load i16, ptr %8, align 2
+  %10 = getelementptr inbounds i16, ptr %sc, i64 %indvars.iv
+  %11 = load i16, ptr %10, align 2
   %12 = add i16 %11, %9
-  %13 = getelementptr inbounds i16, i16* %sa, i64 %indvars.iv
-  store i16 %12, i16* %13, align 2
+  %13 = getelementptr inbounds i16, ptr %sa, i64 %indvars.iv
+  store i16 %12, ptr %13, align 2
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
@@ -303,16 +303,16 @@ define void @example10a(i16* noalias nocapture %sa, i16* noalias nocapture %sb,
 ;CHECK: sext <4 x i16>
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
-define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
+define void @example10b(ptr noalias nocapture %sa, ptr noalias nocapture %sb, ptr noalias nocapture %sc, ptr noalias nocapture %ia, ptr noalias nocapture %ib, ptr noalias nocapture %ic) nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds i16, i16* %sb, i64 %indvars.iv
-  %3 = load i16, i16* %2, align 2
+  %2 = getelementptr inbounds i16, ptr %sb, i64 %indvars.iv
+  %3 = load i16, ptr %2, align 2
   %4 = sext i16 %3 to i32
-  %5 = getelementptr inbounds i32, i32* %ia, i64 %indvars.iv
-  store i32 %4, i32* %5, align 4
+  %5 = getelementptr inbounds i32, ptr %ia, i64 %indvars.iv
+  store i32 %4, ptr %5, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
@@ -339,24 +339,24 @@ define void @example11() nounwind uwtable ssp {
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %2 = shl nsw i64 %indvars.iv, 1
   %3 = or i64 %2, 1
-  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %3
-  %5 = load i32, i32* %4, align 4
-  %6 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %3
-  %7 = load i32, i32* %6, align 4
+  %4 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %3
+  %5 = load i32, ptr %4, align 4
+  %6 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %3
+  %7 = load i32, ptr %6, align 4
   %8 = mul nsw i32 %7, %5
-  %9 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %2
-  %10 = load i32, i32* %9, align 8
-  %11 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %2
-  %12 = load i32, i32* %11, align 8
+  %9 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %2
+  %10 = load i32, ptr %9, align 8
+  %11 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %2
+  %12 = load i32, ptr %11, align 8
   %13 = mul nsw i32 %12, %10
   %14 = sub nsw i32 %8, %13
-  %15 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %14, i32* %15, align 4
+  %15 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %14, ptr %15, align 4
   %16 = mul nsw i32 %7, %10
   %17 = mul nsw i32 %12, %5
   %18 = add nsw i32 %17, %16
-  %19 = getelementptr inbounds [2048 x i32], [2048 x i32]* @d, i64 0, i64 %indvars.iv
-  store i32 %18, i32* %19, align 4
+  %19 = getelementptr inbounds [2048 x i32], ptr @d, i64 0, i64 %indvars.iv
+  store i32 %18, ptr %19, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
@@ -376,9 +376,9 @@ define void @example12() {
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %2 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
   %3 = trunc i64 %indvars.iv to i32
-  store i32 %3, i32* %2, align 4
+  store i32 %3, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
@@ -391,24 +391,24 @@ define void @example12() {
 ;CHECK-LABEL: @example13(
 ;CHECK: <4 x i32>
 ;CHECK: ret void
-define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %out) nounwind uwtable ssp {
+define void @example13(ptr nocapture %A, ptr nocapture %B, ptr nocapture %out) nounwind uwtable ssp {
   br label %.preheader
 
 .preheader:                                       ; preds = %14, %0
   %indvars.iv4 = phi i64 [ 0, %0 ], [ %indvars.iv.next5, %14 ]
-  %1 = getelementptr inbounds i32*, i32** %A, i64 %indvars.iv4
-  %2 = load i32*, i32** %1, align 8
-  %3 = getelementptr inbounds i32*, i32** %B, i64 %indvars.iv4
-  %4 = load i32*, i32** %3, align 8
+  %1 = getelementptr inbounds ptr, ptr %A, i64 %indvars.iv4
+  %2 = load ptr, ptr %1, align 8
+  %3 = getelementptr inbounds ptr, ptr %B, i64 %indvars.iv4
+  %4 = load ptr, ptr %3, align 8
   br label %5
 
 ; <label>:5                                       ; preds = %.preheader, %5
   %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %5 ]
   %
diff .02 = phi i32 [ 0, %.preheader ], [ %11, %5 ]
-  %6 = getelementptr inbounds i32, i32* %2, i64 %indvars.iv
-  %7 = load i32, i32* %6, align 4
-  %8 = getelementptr inbounds i32, i32* %4, i64 %indvars.iv
-  %9 = load i32, i32* %8, align 4
+  %6 = getelementptr inbounds i32, ptr %2, i64 %indvars.iv
+  %7 = load i32, ptr %6, align 4
+  %8 = getelementptr inbounds i32, ptr %4, i64 %indvars.iv
+  %9 = load i32, ptr %8, align 4
   %10 = add i32 %7, %
diff .02
   %11 = sub i32 %10, %9
   %indvars.iv.next = add i64 %indvars.iv, 8
@@ -417,8 +417,8 @@ define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %o
   br i1 %13, label %5, label %14
 
 ; <label>:14                                      ; preds = %5
-  %15 = getelementptr inbounds i32, i32* %out, i64 %indvars.iv4
-  store i32 %11, i32* %15, align 4
+  %15 = getelementptr inbounds i32, ptr %out, i64 %indvars.iv4
+  store i32 %11, ptr %15, align 4
   %indvars.iv.next5 = add i64 %indvars.iv4, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next5 to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 32
@@ -432,7 +432,7 @@ define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %o
 ;CHECK-LABEL: @example14(
 ;CHECK: <4 x i32>
 ;CHECK: ret void
-define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocapture %out) nounwind uwtable ssp {
+define void @example14(ptr nocapture %in, ptr nocapture %coeff, ptr nocapture %out) nounwind uwtable ssp {
 .preheader3:
   br label %.preheader
 
@@ -444,14 +444,14 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
 ; <label>:0                                       ; preds = %0, %.preheader
   %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %0 ]
   %sum.12 = phi i32 [ %sum.05, %.preheader ], [ %10, %0 ]
-  %1 = getelementptr inbounds i32*, i32** %in, i64 %indvars.iv
-  %2 = load i32*, i32** %1, align 8
-  %3 = getelementptr inbounds i32, i32* %2, i64 %indvars.iv7
-  %4 = load i32, i32* %3, align 4
-  %5 = getelementptr inbounds i32*, i32** %coeff, i64 %indvars.iv
-  %6 = load i32*, i32** %5, align 8
-  %7 = getelementptr inbounds i32, i32* %6, i64 %indvars.iv7
-  %8 = load i32, i32* %7, align 4
+  %1 = getelementptr inbounds ptr, ptr %in, i64 %indvars.iv
+  %2 = load ptr, ptr %1, align 8
+  %3 = getelementptr inbounds i32, ptr %2, i64 %indvars.iv7
+  %4 = load i32, ptr %3, align 4
+  %5 = getelementptr inbounds ptr, ptr %coeff, i64 %indvars.iv
+  %6 = load ptr, ptr %5, align 8
+  %7 = getelementptr inbounds i32, ptr %6, i64 %indvars.iv7
+  %8 = load i32, ptr %7, align 4
   %9 = mul nsw i32 %8, %4
   %10 = add nsw i32 %9, %sum.12
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -466,7 +466,7 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
   br i1 %exitcond10, label %.preheader3.1, label %.preheader
 
 .preheader3.1:                                    ; preds = %11
-  store i32 %10, i32* %out, align 4
+  store i32 %10, ptr %out, align 4
   br label %.preheader.1
 
 .preheader.1:                                     ; preds = %24, %.preheader3.1
@@ -478,14 +478,14 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
   %indvars.iv.1 = phi i64 [ 0, %.preheader.1 ], [ %13, %12 ]
   %sum.12.1 = phi i32 [ %sum.05.1, %.preheader.1 ], [ %23, %12 ]
   %13 = add nsw i64 %indvars.iv.1, 1
-  %14 = getelementptr inbounds i32*, i32** %in, i64 %13
-  %15 = load i32*, i32** %14, align 8
-  %16 = getelementptr inbounds i32, i32* %15, i64 %indvars.iv7.1
-  %17 = load i32, i32* %16, align 4
-  %18 = getelementptr inbounds i32*, i32** %coeff, i64 %indvars.iv.1
-  %19 = load i32*, i32** %18, align 8
-  %20 = getelementptr inbounds i32, i32* %19, i64 %indvars.iv7.1
-  %21 = load i32, i32* %20, align 4
+  %14 = getelementptr inbounds ptr, ptr %in, i64 %13
+  %15 = load ptr, ptr %14, align 8
+  %16 = getelementptr inbounds i32, ptr %15, i64 %indvars.iv7.1
+  %17 = load i32, ptr %16, align 4
+  %18 = getelementptr inbounds ptr, ptr %coeff, i64 %indvars.iv.1
+  %19 = load ptr, ptr %18, align 8
+  %20 = getelementptr inbounds i32, ptr %19, i64 %indvars.iv7.1
+  %21 = load i32, ptr %20, align 4
   %22 = mul nsw i32 %21, %17
   %23 = add nsw i32 %22, %sum.12.1
   %lftr.wideiv.1 = trunc i64 %13 to i32
@@ -499,8 +499,8 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
   br i1 %exitcond10.1, label %.preheader3.2, label %.preheader.1
 
 .preheader3.2:                                    ; preds = %24
-  %25 = getelementptr inbounds i32, i32* %out, i64 1
-  store i32 %23, i32* %25, align 4
+  %25 = getelementptr inbounds i32, ptr %out, i64 1
+  store i32 %23, ptr %25, align 4
   br label %.preheader.2
 
 .preheader.2:                                     ; preds = %38, %.preheader3.2
@@ -512,14 +512,14 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
   %indvars.iv.2 = phi i64 [ 0, %.preheader.2 ], [ %indvars.iv.next.2, %26 ]
   %sum.12.2 = phi i32 [ %sum.05.2, %.preheader.2 ], [ %37, %26 ]
   %27 = add nsw i64 %indvars.iv.2, 2
-  %28 = getelementptr inbounds i32*, i32** %in, i64 %27
-  %29 = load i32*, i32** %28, align 8
-  %30 = getelementptr inbounds i32, i32* %29, i64 %indvars.iv7.2
-  %31 = load i32, i32* %30, align 4
-  %32 = getelementptr inbounds i32*, i32** %coeff, i64 %indvars.iv.2
-  %33 = load i32*, i32** %32, align 8
-  %34 = getelementptr inbounds i32, i32* %33, i64 %indvars.iv7.2
-  %35 = load i32, i32* %34, align 4
+  %28 = getelementptr inbounds ptr, ptr %in, i64 %27
+  %29 = load ptr, ptr %28, align 8
+  %30 = getelementptr inbounds i32, ptr %29, i64 %indvars.iv7.2
+  %31 = load i32, ptr %30, align 4
+  %32 = getelementptr inbounds ptr, ptr %coeff, i64 %indvars.iv.2
+  %33 = load ptr, ptr %32, align 8
+  %34 = getelementptr inbounds i32, ptr %33, i64 %indvars.iv7.2
+  %35 = load i32, ptr %34, align 4
   %36 = mul nsw i32 %35, %31
   %37 = add nsw i32 %36, %sum.12.2
   %indvars.iv.next.2 = add i64 %indvars.iv.2, 1
@@ -534,8 +534,8 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
   br i1 %exitcond10.2, label %.preheader3.3, label %.preheader.2
 
 .preheader3.3:                                    ; preds = %38
-  %39 = getelementptr inbounds i32, i32* %out, i64 2
-  store i32 %37, i32* %39, align 4
+  %39 = getelementptr inbounds i32, ptr %out, i64 2
+  store i32 %37, ptr %39, align 4
   br label %.preheader.3
 
 .preheader.3:                                     ; preds = %52, %.preheader3.3
@@ -547,14 +547,14 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
   %indvars.iv.3 = phi i64 [ 0, %.preheader.3 ], [ %indvars.iv.next.3, %40 ]
   %sum.12.3 = phi i32 [ %sum.05.3, %.preheader.3 ], [ %51, %40 ]
   %41 = add nsw i64 %indvars.iv.3, 3
-  %42 = getelementptr inbounds i32*, i32** %in, i64 %41
-  %43 = load i32*, i32** %42, align 8
-  %44 = getelementptr inbounds i32, i32* %43, i64 %indvars.iv7.3
-  %45 = load i32, i32* %44, align 4
-  %46 = getelementptr inbounds i32*, i32** %coeff, i64 %indvars.iv.3
-  %47 = load i32*, i32** %46, align 8
-  %48 = getelementptr inbounds i32, i32* %47, i64 %indvars.iv7.3
-  %49 = load i32, i32* %48, align 4
+  %42 = getelementptr inbounds ptr, ptr %in, i64 %41
+  %43 = load ptr, ptr %42, align 8
+  %44 = getelementptr inbounds i32, ptr %43, i64 %indvars.iv7.3
+  %45 = load i32, ptr %44, align 4
+  %46 = getelementptr inbounds ptr, ptr %coeff, i64 %indvars.iv.3
+  %47 = load ptr, ptr %46, align 8
+  %48 = getelementptr inbounds i32, ptr %47, i64 %indvars.iv7.3
+  %49 = load i32, ptr %48, align 4
   %50 = mul nsw i32 %49, %45
   %51 = add nsw i32 %50, %sum.12.3
   %indvars.iv.next.3 = add i64 %indvars.iv.3, 1
@@ -569,8 +569,8 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
   br i1 %exitcond10.3, label %53, label %.preheader.3
 
 ; <label>:53                                      ; preds = %52
-  %54 = getelementptr inbounds i32, i32* %out, i64 3
-  store i32 %51, i32* %54, align 4
+  %54 = getelementptr inbounds i32, ptr %out, i64 3
+  store i32 %51, ptr %54, align 4
   ret void
 }
 
@@ -578,7 +578,7 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
 ;CHECK: load <4 x i32>
 ;CHECK: shufflevector {{.*}} <i32 3, i32 2, i32 1, i32 0>
 ;CHECK: ret i32
-define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
+define i32 @example21(ptr nocapture %b, i32 %n) nounwind uwtable readonly ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
@@ -590,8 +590,8 @@ define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
   %indvars.iv = phi i64 [ %2, %.lr.ph ], [ %indvars.iv.next, %3 ]
   %a.02 = phi i32 [ 0, %.lr.ph ], [ %6, %3 ]
   %indvars.iv.next = add i64 %indvars.iv, -1
-  %4 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
-  %5 = load i32, i32* %4, align 4
+  %4 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv.next
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %a.02
   %7 = trunc i64 %indvars.iv.next to i32
   %8 = icmp sgt i32 %7, 0
@@ -605,19 +605,19 @@ define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
 ;CHECK-LABEL: @example23(
 ;CHECK: <4 x i32>
 ;CHECK: ret void
-define void @example23(i16* nocapture %src, i32* nocapture %dst) nounwind uwtable ssp {
+define void @example23(ptr nocapture %src, ptr nocapture %dst) nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %1, %0
-  %.04 = phi i16* [ %src, %0 ], [ %2, %1 ]
-  %.013 = phi i32* [ %dst, %0 ], [ %6, %1 ]
+  %.04 = phi ptr [ %src, %0 ], [ %2, %1 ]
+  %.013 = phi ptr [ %dst, %0 ], [ %6, %1 ]
   %i.02 = phi i32 [ 0, %0 ], [ %7, %1 ]
-  %2 = getelementptr inbounds i16, i16* %.04, i64 1
-  %3 = load i16, i16* %.04, align 2
+  %2 = getelementptr inbounds i16, ptr %.04, i64 1
+  %3 = load i16, ptr %.04, align 2
   %4 = zext i16 %3 to i32
   %5 = shl nuw nsw i32 %4, 7
-  %6 = getelementptr inbounds i32, i32* %.013, i64 1
-  store i32 %5, i32* %.013, align 4
+  %6 = getelementptr inbounds i32, ptr %.013, i64 1
+  store i32 %5, ptr %.013, align 4
   %7 = add nsw i32 %i.02, 1
   %exitcond = icmp eq i32 %7, 256
   br i1 %exitcond, label %8, label %1
@@ -634,15 +634,15 @@ define void @example24(i16 signext %x, i16 signext %y) nounwind uwtable ssp {
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [1024 x float], [1024 x float]* @fa, i64 0, i64 %indvars.iv
-  %3 = load float, float* %2, align 4
-  %4 = getelementptr inbounds [1024 x float], [1024 x float]* @fb, i64 0, i64 %indvars.iv
-  %5 = load float, float* %4, align 4
+  %2 = getelementptr inbounds [1024 x float], ptr @fa, i64 0, i64 %indvars.iv
+  %3 = load float, ptr %2, align 4
+  %4 = getelementptr inbounds [1024 x float], ptr @fb, i64 0, i64 %indvars.iv
+  %5 = load float, ptr %4, align 4
   %6 = fcmp olt float %3, %5
   %x.y = select i1 %6, i16 %x, i16 %y
   %7 = sext i16 %x.y to i32
-  %8 = getelementptr inbounds [1024 x i32], [1024 x i32]* @ic, i64 0, i64 %indvars.iv
-  store i32 %7, i32* %8, align 4
+  %8 = getelementptr inbounds [1024 x i32], ptr @ic, i64 0, i64 %indvars.iv
+  store i32 %7, ptr %8, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
@@ -661,20 +661,20 @@ define void @example25() nounwind uwtable ssp {
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [1024 x float], [1024 x float]* @da, i64 0, i64 %indvars.iv
-  %3 = load float, float* %2, align 4
-  %4 = getelementptr inbounds [1024 x float], [1024 x float]* @db, i64 0, i64 %indvars.iv
-  %5 = load float, float* %4, align 4
+  %2 = getelementptr inbounds [1024 x float], ptr @da, i64 0, i64 %indvars.iv
+  %3 = load float, ptr %2, align 4
+  %4 = getelementptr inbounds [1024 x float], ptr @db, i64 0, i64 %indvars.iv
+  %5 = load float, ptr %4, align 4
   %6 = fcmp olt float %3, %5
-  %7 = getelementptr inbounds [1024 x float], [1024 x float]* @dc, i64 0, i64 %indvars.iv
-  %8 = load float, float* %7, align 4
-  %9 = getelementptr inbounds [1024 x float], [1024 x float]* @dd, i64 0, i64 %indvars.iv
-  %10 = load float, float* %9, align 4
+  %7 = getelementptr inbounds [1024 x float], ptr @dc, i64 0, i64 %indvars.iv
+  %8 = load float, ptr %7, align 4
+  %9 = getelementptr inbounds [1024 x float], ptr @dd, i64 0, i64 %indvars.iv
+  %10 = load float, ptr %9, align 4
   %11 = fcmp olt float %8, %10
   %12 = and i1 %6, %11
   %13 = zext i1 %12 to i32
-  %14 = getelementptr inbounds [1024 x i32], [1024 x i32]* @dj, i64 0, i64 %indvars.iv
-  store i32 %13, i32* %14, align 4
+  %14 = getelementptr inbounds [1024 x i32], ptr @dj, i64 0, i64 %indvars.iv
+  store i32 %13, ptr %14, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024

diff  --git a/llvm/test/Transforms/LoopVectorize/gep_with_bitcast.ll b/llvm/test/Transforms/LoopVectorize/gep_with_bitcast.ll
index 33cacb390d792..a6c9aadaac36c 100644
--- a/llvm/test/Transforms/LoopVectorize/gep_with_bitcast.ll
+++ b/llvm/test/Transforms/LoopVectorize/gep_with_bitcast.ll
@@ -4,7 +4,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 
 ; Vectorization of loop with bitcast between GEP and load
 ; Simplified source code:
-;void foo (double** __restrict__  in, bool * __restrict__ res) {
+;void foo (ptr __restrict__  in, bool * __restrict__ res) {
 ;
 ;  for (int i = 0; i < 4096; ++i)
 ;    res[i] = ((unsigned long long)in[i] == 0);
@@ -13,25 +13,23 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 ; CHECK-LABEL: @foo
 ; CHECK: vector.body
 ; CHECK:  %[[IV:.+]] = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; CHECK:  %[[v0:.+]] = getelementptr inbounds double*, double** %in, i64 %[[IV]]
-; CHECK:  %[[v1:.+]] = bitcast double** %[[v0]] to <4 x i64>*
-; CHECK:  %wide.load = load <4 x i64>, <4 x i64>* %[[v1]], align 8
+; CHECK:  %[[v0:.+]] = getelementptr inbounds ptr, ptr %in, i64 %[[IV]]
+; CHECK:  %wide.load = load <4 x i64>, ptr %[[v0]], align 8
 ; CHECK:  icmp eq <4 x i64> %wide.load, zeroinitializer
 ; CHECK:  br i1
 
-define void @foo(double** noalias nocapture readonly %in, double** noalias nocapture readnone %out, i8* noalias nocapture %res) #0 {
+define void @foo(ptr noalias nocapture readonly %in, ptr noalias nocapture readnone %out, ptr noalias nocapture %res) #0 {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds double*, double** %in, i64 %indvars.iv
-  %tmp53 = bitcast double** %arrayidx to i64*
-  %tmp54 = load i64, i64* %tmp53, align 8
+  %arrayidx = getelementptr inbounds ptr, ptr %in, i64 %indvars.iv
+  %tmp54 = load i64, ptr %arrayidx, align 8
   %cmp1 = icmp eq i64 %tmp54, 0
-  %arrayidx3 = getelementptr inbounds i8, i8* %res, i64 %indvars.iv
+  %arrayidx3 = getelementptr inbounds i8, ptr %res, i64 %indvars.iv
   %frombool = zext i1 %cmp1 to i8
-  store i8 %frombool, i8* %arrayidx3, align 1
+  store i8 %frombool, ptr %arrayidx3, align 1
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 4096
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/hints-trans.ll b/llvm/test/Transforms/LoopVectorize/hints-trans.ll
index e9c059ee83e14..3c7ef44f1d340 100644
--- a/llvm/test/Transforms/LoopVectorize/hints-trans.ll
+++ b/llvm/test/Transforms/LoopVectorize/hints-trans.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: norecurse nounwind uwtable
-define void @foo(i32* nocapture %b) #0 {
+define void @foo(ptr nocapture %b) #0 {
 entry:
   br label %for.body
 
@@ -13,8 +13,8 @@ for.cond.cleanup:                                 ; preds = %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  store i32 1, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  store i32 1, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 16
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !0

diff  --git a/llvm/test/Transforms/LoopVectorize/hoist-loads.ll b/llvm/test/Transforms/LoopVectorize/hoist-loads.ll
index 03433f27b16fe..3a05fe98a0c93 100644
--- a/llvm/test/Transforms/LoopVectorize/hoist-loads.ll
+++ b/llvm/test/Transforms/LoopVectorize/hoist-loads.ll
@@ -14,19 +14,19 @@ entry:
   br label %for.body
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @A, i64 0, i64 %indvars.iv
-  %arrayidx2 = getelementptr inbounds [1024 x float], [1024 x float]* @B, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @A, i64 0, i64 %indvars.iv
+  %arrayidx2 = getelementptr inbounds [1024 x float], ptr @B, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx2, align 4
   %cmp3 = fcmp oeq float %0, 0.000000e+00
   br i1 %cmp3, label %if.end9, label %if.else
 
 if.else:
-  %1 = load float, float* %arrayidx, align 4
+  %1 = load float, ptr %arrayidx, align 4
   br label %if.end9
 
 if.end9:
   %tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ]
-  store float %tmp.0, float* %arrayidx, align 4
+  store float %tmp.0, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 1024
@@ -42,24 +42,24 @@ for.end:
 ; CHECK: load <2 x float>
 ; CHECK-NOT: load <2 x float>
 
-define void @dont_hoist_cond_load([1024 x float]* %a) {
+define void @dont_hoist_cond_load(ptr %a) {
 entry:
   br label %for.body
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* %a, i64 0, i64 %indvars.iv
-  %arrayidx2 = getelementptr inbounds [1024 x float], [1024 x float]* @B, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr %a, i64 0, i64 %indvars.iv
+  %arrayidx2 = getelementptr inbounds [1024 x float], ptr @B, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx2, align 4
   %cmp3 = fcmp oeq float %0, 0.000000e+00
   br i1 %cmp3, label %if.end9, label %if.else
 
 if.else:
-  %1 = load float, float* %arrayidx, align 4
+  %1 = load float, ptr %arrayidx, align 4
   br label %if.end9
 
 if.end9:
   %tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ]
-  store float %tmp.0, float* %arrayidx2, align 4
+  store float %tmp.0, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 1024

diff  --git a/llvm/test/Transforms/LoopVectorize/i8-induction.ll b/llvm/test/Transforms/LoopVectorize/i8-induction.ll
index 182c657aba870..220fd64e6a829 100644
--- a/llvm/test/Transforms/LoopVectorize/i8-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/i8-induction.ll
@@ -14,8 +14,8 @@ define void @f() nounwind uwtable ssp {
 ; DEBUGLOC:         %vec.ind.next = add {{.*}}, !dbg ![[DbgLoc]]
 
 scalar.ph:
-  store i8 0, i8* inttoptr (i64 1 to i8*), align 1
-  %0 = load i8, i8* @a, align 1
+  store i8 0, ptr inttoptr (i64 1 to ptr), align 1
+  %0 = load i8, ptr @a, align 1
   br label %for.body
 
 for.body:
@@ -32,7 +32,7 @@ for.body:
   br i1 %phitmp14, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
-  store i8 %mul, i8* @b, align 1
+  store i8 %mul, ptr @b, align 1
   ret void
 }
 

diff  --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
index f73c5959ee8c3..7b3f03e339f2e 100644
--- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
@@ -14,7 +14,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 ; CHECK:       icmp slt <4 x i64> %[[I]], %broadcast.splat
 ; CHECK:       br i1 {{.*}}, label %middle.block, label %vector.body
 ;
-define i32 @more_than_one_use(i32* %a, i64 %n) {
+define i32 @more_than_one_use(ptr %a, i64 %n) {
 entry:
   br label %for.body
 
@@ -24,8 +24,8 @@ for.body:
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   %tmp0 = select i1 %cond, i64 %i.next, i64 0
-  %tmp1 = getelementptr inbounds i32, i32* %a, i64 %tmp0
-  %tmp2 = load i32, i32* %tmp1, align 8
+  %tmp1 = getelementptr inbounds i32, ptr %a, i64 %tmp0
+  %tmp2 = load i32, ptr %tmp1, align 8
   %tmp3 = add i32 %r, %tmp2
   br i1 %cond, label %for.body, label %for.end
 
@@ -74,7 +74,7 @@ for.end:
 ; CHECK-NEXT:   EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
 ; CHECK-NEXT: No successor
 ; CHECK-NEXT: }
-define void @test(i32* %ptr) {
+define void @test(ptr %ptr) {
 entry:
   br label %loop
 
@@ -82,8 +82,8 @@ loop:                       ; preds = %loop, %entry
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
   %cond0 = icmp ult i64 %iv, 13
   %s = select i1 %cond0, i32 10, i32 20
-  %gep = getelementptr inbounds i32, i32* %ptr, i64 %iv
-  store i32 %s, i32* %gep
+  %gep = getelementptr inbounds i32, ptr %ptr, i64 %iv
+  store i32 %s, ptr %gep
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 14
   br i1 %exitcond, label %exit, label %loop

diff  --git a/llvm/test/Transforms/LoopVectorize/if-conv-crash.ll b/llvm/test/Transforms/LoopVectorize/if-conv-crash.ll
index 01691b6a45d26..13eec6b41b875 100644
--- a/llvm/test/Transforms/LoopVectorize/if-conv-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-conv-crash.ll
@@ -39,7 +39,7 @@ if.end25:                                         ; preds = %entry
 
 ; PR15990
 ; We can have basic blocks with single entry PHI nodes.
-define void @single_entry_phi(i32* %a, i32 *%b) {
+define void @single_entry_phi(ptr %a, ptr %b) {
 entry:
   br label %for.cond1.preheader
 
@@ -55,6 +55,6 @@ for.end:
 
 for.cond.for.end5:
   %and.lcssa = phi i32 [ %malicious.phi, %for.end ]
-  store i32 %and.lcssa, i32* %a, align 4
+  store i32 %and.lcssa, ptr %a, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/if-conversion-edgemasks.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-edgemasks.ll
index fb993cce5fe36..7117a5dd641fa 100644
--- a/llvm/test/Transforms/LoopVectorize/if-conversion-edgemasks.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-conversion-edgemasks.ll
@@ -2,9 +2,9 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
- at a = global i32* null, align 8
- at b = global i32* null, align 8
- at c = global i32* null, align 8
+ at a = global ptr null, align 8
+ at b = global ptr null, align 8
+ at c = global ptr null, align 8
 
 ; Don't create an exponetial IR for the edge masks needed when if-converting
 ; this code.
@@ -19,15 +19,15 @@ entry:
   br i1 %cmp88, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:
-  %0 = load i32*, i32** @b, align 8
-  %1 = load i32*, i32** @a, align 8
-  %2 = load i32*, i32** @c, align 8
+  %0 = load ptr, ptr @b, align 8
+  %1 = load ptr, ptr @a, align 8
+  %2 = load ptr, ptr @c, align 8
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %_ZL3fn3ii.exit58 ]
-  %arrayidx = getelementptr inbounds i32, i32* %0, i64 %indvars.iv
-  %3 = load i32, i32* %arrayidx, align 4  %4 = trunc i64 %indvars.iv to i32
+  %arrayidx = getelementptr inbounds i32, ptr %0, i64 %indvars.iv
+  %3 = load i32, ptr %arrayidx, align 4  %4 = trunc i64 %indvars.iv to i32
   %and.i = and i32 %4, 1
   %tobool.i.i = icmp eq i32 %and.i, 0
   br i1 %tobool.i.i, label %if.end.i, label %if.then.i
@@ -133,9 +133,9 @@ if.then.i15.i:
 
 _ZL3fn3ii.exit:
   %p1.addr.0.i16.i = phi i32 [ %or.i14.i, %if.then.i15.i ], [ %p1.addr.3.i.i, %_Z3fn2iii.exit.i ]
-  %arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
-  store i32 %p1.addr.0.i16.i, i32* %arrayidx2, align 4  %arrayidx4 = getelementptr inbounds i32, i32* %0, i64 %indvars.iv
-  %10 = load i32, i32* %arrayidx4, align 4  br i1 %tobool.i.i, label %_Z3fn1ii.exit.i26, label %if.then.i.i21
+  %arrayidx2 = getelementptr inbounds i32, ptr %1, i64 %indvars.iv
+  store i32 %p1.addr.0.i16.i, ptr %arrayidx2, align 4  %arrayidx4 = getelementptr inbounds i32, ptr %0, i64 %indvars.iv
+  %10 = load i32, ptr %arrayidx4, align 4  br i1 %tobool.i.i, label %_Z3fn1ii.exit.i26, label %if.then.i.i21
 
 if.then.i.i21:
   %and.i.i18 = lshr i32 %10, 2
@@ -231,8 +231,8 @@ if.then.i15.i56:
 
 _ZL3fn3ii.exit58:
   %p1.addr.0.i16.i57 = phi i32 [ %or.i14.i55, %if.then.i15.i56 ], [ %p1.addr.3.i.i50, %_Z3fn2iii.exit.i52 ]
-  %arrayidx7 = getelementptr inbounds i32, i32* %2, i64 %indvars.iv
-  store i32 %p1.addr.0.i16.i57, i32* %arrayidx7, align 4  %indvars.iv.next = add i64 %indvars.iv, 1
+  %arrayidx7 = getelementptr inbounds i32, ptr %2, i64 %indvars.iv
+  store i32 %p1.addr.0.i16.i57, ptr %arrayidx7, align 4  %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, %p1
   br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge

diff  --git a/llvm/test/Transforms/LoopVectorize/if-conversion-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-reduction.ll
index 1a82f726b5337..33ba9428378c7 100644
--- a/llvm/test/Transforms/LoopVectorize/if-conversion-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-conversion-reduction.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK-LABEL: @reduction_func(
 ;CHECK-NOT: load <4 x i32>
 ;CHECK: ret i32
-define i32 @reduction_func(i32* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+define i32 @reduction_func(ptr nocapture %A, i32 %n) nounwind uwtable readonly ssp {
 entry:
   %cmp10 = icmp sgt i32 %n, 0
   br i1 %cmp10, label %for.body, label %for.end
@@ -13,8 +13,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
   %sum.011 = phi i32 [ %sum.1, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 30
   br i1 %cmp1, label %if.then, label %for.inc
 

diff  --git a/llvm/test/Transforms/LoopVectorize/if-conversion.ll b/llvm/test/Transforms/LoopVectorize/if-conversion.ll
index 6fa4553ccc436..37ed0c641e637 100644
--- a/llvm/test/Transforms/LoopVectorize/if-conversion.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-conversion.ll
@@ -23,7 +23,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK: add <4 x i32>
 ;CHECK: select <4 x i1>
 ;CHECK: ret i32
-define i32 @function0(i32* nocapture %a, i32* nocapture %b, i32 %start, i32 %end) nounwind uwtable ssp {
+define i32 @function0(ptr nocapture %a, ptr nocapture %b, i32 %start, i32 %end) nounwind uwtable ssp {
 entry:
   %cmp16 = icmp slt i32 %start, %end
   br i1 %cmp16, label %for.body.lr.ph, label %for.end
@@ -34,10 +34,10 @@ for.body.lr.ph:
 
 for.body:
   %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %if.end ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx, align 4
-  %arrayidx4 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx4, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx4, align 4
   %cmp5 = icmp sgt i32 %1, %2
   br i1 %cmp5, label %if.then, label %if.end
 
@@ -48,7 +48,7 @@ if.then:
 
 if.end:
   %k.0 = phi i32 [ %add, %if.then ], [ %1, %for.body ]
-  store i32 %k.0, i32* %arrayidx, align 4
+  store i32 %k.0, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %3 = trunc i64 %indvars.iv.next to i32
   %cmp = icmp slt i32 %3, %end
@@ -75,7 +75,7 @@ for.end:
 ;CHECK: add <4 x i32>
 ;CHECK: select <4 x i1>
 ;CHECK: ret i32
-define i32 @reduction_func(i32* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+define i32 @reduction_func(ptr nocapture %A, i32 %n) nounwind uwtable readonly ssp {
 entry:
   %cmp10 = icmp sgt i32 %n, 0
   br i1 %cmp10, label %for.body, label %for.end
@@ -83,8 +83,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
   %sum.011 = phi i32 [ %sum.1, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 30
   br i1 %cmp1, label %if.then, label %for.inc
 
@@ -105,8 +105,8 @@ for.end:                                          ; preds = %for.inc, %entry
   ret i32 %sum.0.lcssa
 }
 
- at a = common global [1 x i32*] zeroinitializer, align 8
- at c = common global i32* null, align 8
+ at a = common global [1 x ptr] zeroinitializer, align 8
+ at c = common global ptr null, align 8
 
 ; We use to if convert this loop. This is not safe because there is a trapping
 ; constant expression.
@@ -127,7 +127,7 @@ bb2:                                             ; preds = %bb4, %bb1
   br label %bb3
 
 bb3:                                             ; preds = %bb2
-  %_tmp1 = phi [1 x [1 x i32]]* [ undef, %bb2 ]
+  %_tmp1 = phi ptr [ undef, %bb2 ]
   br label %bb4
 
 bb4:                                             ; preds = %bb3

diff  --git a/llvm/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll b/llvm/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll
index 3687257e4db6e..3496ca343023e 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll
@@ -4,10 +4,10 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Test no-predication of instructions that are provably safe, e.g. dividing by
 ; a non-zero constant.
-define void @test(i32* nocapture %asd, i32* nocapture %aud,
-                  i32* nocapture %asr, i32* nocapture %aur,
-                  i32* nocapture %asd0, i32* nocapture %aud0,
-                  i32* nocapture %asr0, i32* nocapture %aur0
+define void @test(ptr nocapture %asd, ptr nocapture %aud,
+                  ptr nocapture %asr, ptr nocapture %aur,
+                  ptr nocapture %asd0, ptr nocapture %aud0,
+                  ptr nocapture %asr0, ptr nocapture %aur0
 ) {
 entry:
   br label %for.body
@@ -28,26 +28,26 @@ for.cond.cleanup:                                 ; preds = %if.end
 
 for.body:                                         ; preds = %if.end, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ]
-  %isd = getelementptr inbounds i32, i32* %asd, i64 %indvars.iv
-  %iud = getelementptr inbounds i32, i32* %aud, i64 %indvars.iv
-  %isr = getelementptr inbounds i32, i32* %asr, i64 %indvars.iv
-  %iur = getelementptr inbounds i32, i32* %aur, i64 %indvars.iv
-  %lsd = load i32, i32* %isd, align 4
-  %lud = load i32, i32* %iud, align 4
-  %lsr = load i32, i32* %isr, align 4
-  %lur = load i32, i32* %iur, align 4
+  %isd = getelementptr inbounds i32, ptr %asd, i64 %indvars.iv
+  %iud = getelementptr inbounds i32, ptr %aud, i64 %indvars.iv
+  %isr = getelementptr inbounds i32, ptr %asr, i64 %indvars.iv
+  %iur = getelementptr inbounds i32, ptr %aur, i64 %indvars.iv
+  %lsd = load i32, ptr %isd, align 4
+  %lud = load i32, ptr %iud, align 4
+  %lsr = load i32, ptr %isr, align 4
+  %lur = load i32, ptr %iur, align 4
   %psd = add nsw i32 %lsd, 23
   %pud = add nsw i32 %lud, 24
   %psr = add nsw i32 %lsr, 25
   %pur = add nsw i32 %lur, 26
-  %isd0 = getelementptr inbounds i32, i32* %asd0, i64 %indvars.iv
-  %iud0 = getelementptr inbounds i32, i32* %aud0, i64 %indvars.iv
-  %isr0 = getelementptr inbounds i32, i32* %asr0, i64 %indvars.iv
-  %iur0 = getelementptr inbounds i32, i32* %aur0, i64 %indvars.iv
-  %lsd0 = load i32, i32* %isd0, align 4
-  %lud0 = load i32, i32* %iud0, align 4
-  %lsr0 = load i32, i32* %isr0, align 4
-  %lur0 = load i32, i32* %iur0, align 4
+  %isd0 = getelementptr inbounds i32, ptr %asd0, i64 %indvars.iv
+  %iud0 = getelementptr inbounds i32, ptr %aud0, i64 %indvars.iv
+  %isr0 = getelementptr inbounds i32, ptr %asr0, i64 %indvars.iv
+  %iur0 = getelementptr inbounds i32, ptr %aur0, i64 %indvars.iv
+  %lsd0 = load i32, ptr %isd0, align 4
+  %lud0 = load i32, ptr %iud0, align 4
+  %lsr0 = load i32, ptr %isr0, align 4
+  %lur0 = load i32, ptr %iur0, align 4
   %psd0 = add nsw i32 %lsd, 27
   %pud0 = add nsw i32 %lud, 28
   %psr0 = add nsw i32 %lsr, 29
@@ -75,14 +75,14 @@ if.end:                                           ; preds = %if.then, %for.body
   %yud0.0 = phi i32 [ %rud0, %if.then ], [ %pud0, %for.body ]
   %ysr0.0 = phi i32 [ %rsr0, %if.then ], [ %psr0, %for.body ]
   %yur0.0 = phi i32 [ %rur0, %if.then ], [ %pur0, %for.body ]
-  store i32 %ysd.0, i32* %isd, align 4
-  store i32 %yud.0, i32* %iud, align 4
-  store i32 %ysr.0, i32* %isr, align 4
-  store i32 %yur.0, i32* %iur, align 4
-  store i32 %ysd0.0, i32* %isd0, align 4
-  store i32 %yud0.0, i32* %iud0, align 4
-  store i32 %ysr0.0, i32* %isr0, align 4
-  store i32 %yur0.0, i32* %iur0, align 4
+  store i32 %ysd.0, ptr %isd, align 4
+  store i32 %yud.0, ptr %iud, align 4
+  store i32 %ysr.0, ptr %isr, align 4
+  store i32 %yur.0, ptr %iur, align 4
+  store i32 %ysd0.0, ptr %isd0, align 4
+  store i32 %yud0.0, ptr %iud0, align 4
+  store i32 %ysr0.0, ptr %isr0, align 4
+  store i32 %yur0.0, ptr %iur0, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 128
   br i1 %exitcond, label %for.cond.cleanup, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll
index bad9441e064e5..b82c5bb473c54 100644
--- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll
@@ -6,7 +6,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 ;   Check vectorization of reduction code which has an fadd instruction after
 ;   an fcmp instruction which compares an array element and 0.
 ;
-; float fcmp_0_fadd_select1(float * restrict x, const int N) {
+; float fcmp_0_fadd_select1(ptr restrict x, const int N) {
 ;   float sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > (float)0.)
@@ -18,7 +18,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], zeroinitializer
 ; CHECK: %[[V3:.*]] = fadd fast <4 x float> %[[V0]], %[[V2:.*]]
 ; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
-define float @fcmp_0_fadd_select1(float* noalias %x, i32 %N) nounwind readonly {
+define float @fcmp_0_fadd_select1(ptr noalias %x, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -30,8 +30,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %header, %for.body
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
   %add = fadd fast float %0, %sum.1
   %sum.2 = select i1 %cmp.2, float %add, float %sum.1
@@ -48,7 +48,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;   Check vectorization of reduction code which has an fadd instruction after
 ;   an fcmp instruction which compares an array element and 0.
 ;
-; double fcmp_0_fadd_select2(double * restrict x, const int N) {
+; double fcmp_0_fadd_select2(ptr restrict x, const int N) {
 ;   double sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > 0.)
@@ -60,7 +60,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], zeroinitializer
 ; CHECK: %[[V3:.*]] = fadd fast <4 x double> %[[V0]], %[[V2:.*]]
 ; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
-define double @fcmp_0_fadd_select2(double* noalias %x, i32 %N) nounwind readonly {
+define double @fcmp_0_fadd_select2(ptr noalias %x, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -72,8 +72,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %header, %for.body
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 4
   %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
   %add = fadd fast double %0, %sum.1
   %sum.2 = select i1 %cmp.2, double %add, double %sum.1
@@ -91,7 +91,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;   an fcmp instruction which compares an array element and a floating-point
 ;   value.
 ;
-; float fcmp_val_fadd_select1(float * restrict x, float y, const int N) {
+; float fcmp_val_fadd_select1(ptr restrict x, float y, const int N) {
 ;   float sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > y)
@@ -103,7 +103,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], %broadcast.splat
 ; CHECK: %[[V3:.*]] = fadd fast <4 x float> %[[V0]], %[[V2:.*]]
 ; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
-define float @fcmp_val_fadd_select1(float* noalias %x, float %y, i32 %N) nounwind readonly {
+define float @fcmp_val_fadd_select1(ptr noalias %x, float %y, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -115,8 +115,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %header, %for.body
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp.2 = fcmp fast ogt float %0, %y
   %add = fadd fast float %0, %sum.1
   %sum.2 = select i1 %cmp.2, float %add, float %sum.1
@@ -134,7 +134,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;   an fcmp instruction which compares an array element and a floating-point
 ;   value.
 ;
-; double fcmp_val_fadd_select2(double * restrict x, double y, const int N) {
+; double fcmp_val_fadd_select2(ptr restrict x, double y, const int N) {
 ;   double sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > y)
@@ -146,7 +146,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], %broadcast.splat
 ; CHECK: %[[V3:.*]] = fadd fast <4 x double> %[[V0]], %[[V2:.*]]
 ; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
-define double @fcmp_val_fadd_select2(double* noalias %x, double %y, i32 %N) nounwind readonly {
+define double @fcmp_val_fadd_select2(ptr noalias %x, double %y, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -158,8 +158,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %header, %for.body
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 4
   %cmp.2 = fcmp fast ogt double %0, %y
   %add = fadd fast double %0, %sum.1
   %sum.2 = select i1 %cmp.2, double %add, double %sum.1
@@ -177,7 +177,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;   an fcmp instruction which compares an array element and another array
 ;   element.
 ;
-; float fcmp_array_elm_fadd_select1(float * restrict x, float * restrict y,
+; float fcmp_array_elm_fadd_select1(ptr restrict x, ptr restrict y,
 ;                                   const int N) {
 ;   float sum = 0.
 ;   for (int i = 0; i < N; ++i)
@@ -190,7 +190,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: %[[V2:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], %[[V1:.*]]
 ; CHECK: %[[V4:.*]] = fadd fast <4 x float> %[[V0]], %[[V3:.*]]
 ; CHECK: select <4 x i1> %[[V2]], <4 x float> %[[V4]], <4 x float> %[[V3]]
-define float @fcmp_array_elm_fadd_select1(float* noalias %x, float* noalias %y, i32 %N) nounwind readonly {
+define float @fcmp_array_elm_fadd_select1(ptr noalias %x, ptr noalias %y, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -202,10 +202,10 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.header
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx.1 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float, float* %arrayidx.1, align 4
-  %arrayidx.2 = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %1 = load float, float* %arrayidx.2, align 4
+  %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx.1, align 4
+  %arrayidx.2 = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx.2, align 4
   %cmp.2 = fcmp fast ogt float %0, %1
   %add = fadd fast float %0, %sum.1
   %sum.2 = select i1 %cmp.2, float %add, float %sum.1
@@ -223,7 +223,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;   an fcmp instruction which compares an array element and another array
 ;   element.
 ;
-; double fcmp_array_elm_fadd_select2(double * restrict x, double * restrict y,
+; double fcmp_array_elm_fadd_select2(ptr restrict x, ptr restrict y,
 ;                                    const int N) {
 ;   double sum = 0.
 ;   for (int i = 0; i < N; ++i)
@@ -236,7 +236,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: %[[V2:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], %[[V1:.*]]
 ; CHECK: %[[V4:.*]] = fadd fast <4 x double> %[[V0]], %[[V3:.*]]
 ; CHECK: select <4 x i1> %[[V2]], <4 x double> %[[V4]], <4 x double> %[[V3]]
-define double @fcmp_array_elm_fadd_select2(double* noalias %x, double* noalias %y, i32 %N) nounwind readonly {
+define double @fcmp_array_elm_fadd_select2(ptr noalias %x, ptr noalias %y, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -248,10 +248,10 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.header
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx.1 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  %0 = load double, double* %arrayidx.1, align 4
-  %arrayidx.2 = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %1 = load double, double* %arrayidx.2, align 4
+  %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx.1, align 4
+  %arrayidx.2 = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %1 = load double, ptr %arrayidx.2, align 4
   %cmp.2 = fcmp fast ogt double %0, %1
   %add = fadd fast double %0, %sum.1
   %sum.2 = select i1 %cmp.2, double %add, double %sum.1
@@ -268,7 +268,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;   Check vectorization of reduction code which has an fsub instruction after
 ;   an fcmp instruction which compares an array element and 0.
 ;
-; float fcmp_0_fsub_select1(float * restrict x, const int N) {
+; float fcmp_0_fsub_select1(ptr restrict x, const int N) {
 ;   float sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > (float)0.)
@@ -280,7 +280,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], zeroinitializer
 ; CHECK: %[[V3:.*]] = fsub fast <4 x float> %[[V2:.*]], %[[V0]]
 ; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
-define float @fcmp_0_fsub_select1(float* noalias %x, i32 %N) nounwind readonly {
+define float @fcmp_0_fsub_select1(ptr noalias %x, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -292,8 +292,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.header
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
   %sub = fsub fast float %sum.1, %0
   %sum.2 = select i1 %cmp.2, float %sub, float %sum.1
@@ -308,7 +308,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; Float pattern:
 ;   Check that is not vectorized if fp-instruction has no fast-math property.
-; float fcmp_0_fsub_select1_novectorize(float * restrict x, const int N) {
+; float fcmp_0_fsub_select1_novectorize(ptr restrict x, const int N) {
 ;   float sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > (float)0.)
@@ -318,7 +318,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; CHECK-LABEL: @fcmp_0_fsub_select1_novectorize(
 ; CHECK-NOT: <4 x float>
-define float @fcmp_0_fsub_select1_novectorize(float* noalias %x, i32 %N) nounwind readonly {
+define float @fcmp_0_fsub_select1_novectorize(ptr noalias %x, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -330,8 +330,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.header
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp.2 = fcmp ogt float %0, 0.000000e+00
   %sub = fsub float %sum.1, %0
   %sum.2 = select i1 %cmp.2, float %sub, float %sum.1
@@ -348,7 +348,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;   Check vectorization of reduction code which has an fsub instruction after
 ;   an fcmp instruction which compares an array element and 0.
 ;
-; double fcmp_0_fsub_select2(double * restrict x, const int N) {
+; double fcmp_0_fsub_select2(ptr restrict x, const int N) {
 ;   double sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > 0.)
@@ -360,7 +360,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], zeroinitializer
 ; CHECK: %[[V3:.*]] = fsub fast <4 x double> %[[V2:.*]], %[[V0]]
 ; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
-define double @fcmp_0_fsub_select2(double* noalias %x, i32 %N) nounwind readonly {
+define double @fcmp_0_fsub_select2(ptr noalias %x, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -372,8 +372,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.header
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 4
   %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
   %sub = fsub fast double %sum.1, %0
   %sum.2 = select i1 %cmp.2, double %sub, double %sum.1
@@ -389,7 +389,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; Double pattern:
 ; Check that is not vectorized if fp-instruction has no fast-math property.
 ;
-; double fcmp_0_fsub_select2_notvectorize(double * restrict x, const int N) {
+; double fcmp_0_fsub_select2_notvectorize(ptr restrict x, const int N) {
 ;   double sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > 0.)
@@ -399,7 +399,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; CHECK-LABEL: @fcmp_0_fsub_select2_notvectorize(
 ; CHECK-NOT: <4 x doubole>
-define double @fcmp_0_fsub_select2_notvectorize(double* noalias %x, i32 %N) nounwind readonly {
+define double @fcmp_0_fsub_select2_notvectorize(ptr noalias %x, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -411,8 +411,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.header
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 4
   %cmp.2 = fcmp ogt double %0, 0.000000e+00
   %sub = fsub double %sum.1, %0
   %sum.2 = select i1 %cmp.2, double %sub, double %sum.1
@@ -429,7 +429,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;   Check vectorization of reduction code which has an fmul instruction after
 ;   an fcmp instruction which compares an array element and 0.
 ;
-; float fcmp_0_fmult_select1(float * restrict x, const int N) {
+; float fcmp_0_fmult_select1(ptr restrict x, const int N) {
 ;   float sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > (float)0.)
@@ -441,7 +441,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], zeroinitializer
 ; CHECK: %[[V3:.*]] = fmul fast <4 x float> %[[V2:.*]], %[[V0]]
 ; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
-define float @fcmp_0_fmult_select1(float* noalias %x, i32 %N) nounwind readonly {
+define float @fcmp_0_fmult_select1(ptr noalias %x, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -453,8 +453,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.header
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
   %mult = fmul fast float %sum.1, %0
   %sum.2 = select i1 %cmp.2, float %mult, float %sum.1
@@ -470,7 +470,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; Float pattern:
 ;   Check that is not vectorized if fp-instruction has no fast-math property.
 ;
-; float fcmp_0_fmult_select1_notvectorize(float * restrict x, const int N) {
+; float fcmp_0_fmult_select1_notvectorize(ptr restrict x, const int N) {
 ;   float sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > (float)0.)
@@ -480,7 +480,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; CHECK-LABEL: @fcmp_0_fmult_select1_notvectorize(
 ; CHECK-NOT: <4 x float>
-define float @fcmp_0_fmult_select1_notvectorize(float* noalias %x, i32 %N) nounwind readonly {
+define float @fcmp_0_fmult_select1_notvectorize(ptr noalias %x, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -492,8 +492,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.header
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp.2 = fcmp ogt float %0, 0.000000e+00
   %mult = fmul float %sum.1, %0
   %sum.2 = select i1 %cmp.2, float %mult, float %sum.1
@@ -510,7 +510,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;   Check vectorization of reduction code which has an fmul instruction after
 ;   an fcmp instruction which compares an array element and 0.
 ;
-; double fcmp_0_fmult_select2(double * restrict x, const int N) {
+; double fcmp_0_fmult_select2(ptr restrict x, const int N) {
 ;   double sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > 0.)
@@ -522,7 +522,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], zeroinitializer
 ; CHECK: %[[V3:.*]] = fmul fast <4 x double> %[[V2:.*]], %[[V0]]
 ; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
-define double @fcmp_0_fmult_select2(double* noalias %x, i32 %N) nounwind readonly {
+define double @fcmp_0_fmult_select2(ptr noalias %x, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -534,8 +534,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.header
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 4
   %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
   %mult = fmul fast double %sum.1, %0
   %sum.2 = select i1 %cmp.2, double %mult, double %sum.1
@@ -551,7 +551,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; Double pattern:
 ;   Check that is not vectorized if fp-instruction has no fast-math property.
 ;
-; double fcmp_0_fmult_select2_notvectorize(double * restrict x, const int N) {
+; double fcmp_0_fmult_select2_notvectorize(ptr restrict x, const int N) {
 ;   double sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > 0.)
@@ -561,7 +561,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; CHECK-LABEL: @fcmp_0_fmult_select2_notvectorize(
 ; CHECK-NOT: <4 x double>
-define double @fcmp_0_fmult_select2_notvectorize(double* noalias %x, i32 %N) nounwind readonly {
+define double @fcmp_0_fmult_select2_notvectorize(ptr noalias %x, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -573,8 +573,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.header
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 4
   %cmp.2 = fcmp ogt double %0, 0.000000e+00
   %mult = fmul double %sum.1, %0
   %sum.2 = select i1 %cmp.2, double %mult, double %sum.1
@@ -591,7 +591,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;   Check vectorisation of reduction code with a pair of selects to 
diff erent
 ;   fadd patterns.
 ;
-; float fcmp_multi(float *a, int n) {
+; float fcmp_multi(ptr a, int n) {
 ;   float sum=0.0;
 ;   for (int i=0;i<n;i++) {
 ;     if (a[i]>1.0)
@@ -616,7 +616,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: %[[S1:.*]] = select <4 x i1> %[[C22]], <4 x float> %[[M1]], <4 x float> %[[M2]]
 ; CHECK: %[[S2:.*]] = select <4 x i1> %[[C1]], <4 x float> %[[V0]], <4 x float> %[[S1]]
 ; CHECK: fadd fast <4 x float> %[[S2]],
-define float @fcmp_multi(float* nocapture readonly %a, i32 %n) nounwind readonly {
+define float @fcmp_multi(ptr nocapture readonly %a, i32 %n) nounwind readonly {
 entry:
   %cmp10 = icmp sgt i32 %n, 0
   br i1 %cmp10, label %for.body.preheader, label %for.end
@@ -628,8 +628,8 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.inc, %for.body.preheader
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
   %sum.011 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp ogt float %0, 1.000000e+00
   br i1 %cmp1, label %for.inc, label %if.else
 
@@ -661,7 +661,7 @@ for.end:                                          ; preds = %for.inc, %entry
 ;   Check vectorisation of reduction code with a pair of selects to 
diff erent
 ;   instructions { fadd, fsub } but equivalent (change in constant).
 ;
-; float fcmp_multi(float *a, int n) {
+; float fcmp_multi(ptr a, int n) {
 ;   float sum=0.0;
 ;   for (int i=0;i<n;i++) {
 ;     if (a[i]>1.0)
@@ -683,7 +683,7 @@ for.end:                                          ; preds = %for.inc, %entry
 ; CHECK: %[[C22:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C21]], <4 x i1> zeroinitializer
 ; CHECK: %[[S1:.*]] = select <4 x i1> %[[C12]], <4 x float> %[[SUB]], <4 x float> %[[ADD]]
 ; CHECK: %[[S2:.*]] = select <4 x i1> %[[C22]], {{.*}} <4 x float> %[[S1]]
-define float @fcmp_fadd_fsub(float* nocapture readonly %a, i32 %n) nounwind readonly {
+define float @fcmp_fadd_fsub(ptr nocapture readonly %a, i32 %n) nounwind readonly {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body.preheader, label %for.end
@@ -695,8 +695,8 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.inc, %for.body.preheader
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
   %sum.010 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp ogt float %0, 1.000000e+00
   br i1 %cmp1, label %if.then, label %if.else
 
@@ -727,7 +727,7 @@ for.end:                                          ; preds = %for.inc, %entry
 ;   Check lack of vectorisation of reduction code with a pair of non-compatible
 ;   instructions { fadd, fmul }.
 ;
-; float fcmp_multi(float *a, int n) {
+; float fcmp_multi(ptr a, int n) {
 ;   float sum=0.0;
 ;   for (int i=0;i<n;i++) {
 ;     if (a[i]>1.0)
@@ -740,7 +740,7 @@ for.end:                                          ; preds = %for.inc, %entry
 
 ; CHECK-LABEL: @fcmp_fadd_fmul(
 ; CHECK-NOT: <4 x float>
-define float @fcmp_fadd_fmul(float* nocapture readonly %a, i32 %n) nounwind readonly {
+define float @fcmp_fadd_fmul(ptr nocapture readonly %a, i32 %n) nounwind readonly {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body.preheader, label %for.end
@@ -752,8 +752,8 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.inc, %for.body.preheader
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
   %sum.010 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp ogt float %0, 1.000000e+00
   br i1 %cmp1, label %if.then, label %if.else
 
@@ -793,7 +793,7 @@ for.end:                                          ; preds = %for.inc, %entry
 ;     return sum;
 ; }
 
-define float @fcmp_store_back(float* nocapture %a, i32 %LEN) nounwind readonly {
+define float @fcmp_store_back(ptr nocapture %a, i32 %LEN) nounwind readonly {
 ; CHECK-LABEL: @fcmp_store_back(
 ; CHECK-NOT:     <4 x float>
 ;
@@ -808,10 +808,10 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.body.preheader
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
   %sum.08 = phi float [ 0.000000e+00, %for.body.preheader ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd fast float %0, %sum.08
-  store float %add, float* %arrayidx, align 4
+  store float %add, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/incorrect-dom-info.ll b/llvm/test/Transforms/LoopVectorize/incorrect-dom-info.ll
index f3357a3fd1a95..bf8bfb424f1e6 100644
--- a/llvm/test/Transforms/LoopVectorize/incorrect-dom-info.ll
+++ b/llvm/test/Transforms/LoopVectorize/incorrect-dom-info.ll
@@ -45,21 +45,21 @@ thread-pre-split.preheader:                       ; preds = %9
 
 .thread-pre-split.loopexit_crit_edge:             ; preds = %19
   %scevgep.sum = xor i64 %umax, -1
-  %scevgep45 = getelementptr i8, i8* %d.020, i64 %scevgep.sum
+  %scevgep45 = getelementptr i8, ptr %d.020, i64 %scevgep.sum
   br label %thread-pre-split.loopexit
 
 thread-pre-split.loopexit:                        ; preds = %11, %.thread-pre-split.loopexit_crit_edge
-  %d.1.lcssa = phi i8* [ %scevgep45, %.thread-pre-split.loopexit_crit_edge ], [ %d.020, %11 ]
+  %d.1.lcssa = phi ptr [ %scevgep45, %.thread-pre-split.loopexit_crit_edge ], [ %d.020, %11 ]
   br i1 false, label %thread-pre-split._crit_edge, label %.lr.ph21
 
 .lr.ph21:                                         ; preds = %26, %thread-pre-split.loopexit, %thread-pre-split.preheader
-  %d.020 = phi i8* [ undef, %26 ], [ %d.1.lcssa, %thread-pre-split.loopexit ], [ undef, %thread-pre-split.preheader ]
+  %d.020 = phi ptr [ undef, %26 ], [ %d.1.lcssa, %thread-pre-split.loopexit ], [ undef, %thread-pre-split.preheader ]
   %10 = phi i64 [ %28, %26 ], [ undef, %thread-pre-split.loopexit ], [ undef, %thread-pre-split.preheader ]
   br i1 undef, label %11, label %22
 
 ; <label>:11                                      ; preds = %.lr.ph21
-  %12 = getelementptr inbounds [0 x i8], [0 x i8]* @PL_utf8skip, i64 0, i64 undef
-  %13 = load i8, i8* %12, align 1
+  %12 = getelementptr inbounds [0 x i8], ptr @PL_utf8skip, i64 0, i64 undef
+  %13 = load i8, ptr %12, align 1
   %14 = zext i8 %13 to i64
   %15 = icmp ugt i64 %14, %10
   %. = select i1 %15, i64 %10, i64 %14
@@ -91,7 +91,7 @@ thread-pre-split.loopexit:                        ; preds = %11, %.thread-pre-sp
   br label %26
 
 ; <label>:26                                      ; preds = %25, %24, %23, %22
-  %27 = load i64, i64* %len, align 8
+  %27 = load i64, ptr %len, align 8
   %28 = add i64 %27, -1
   br i1 undef, label %thread-pre-split._crit_edge, label %.lr.ph21
 

diff  --git a/llvm/test/Transforms/LoopVectorize/increment.ll b/llvm/test/Transforms/LoopVectorize/increment.ll
index 5308b70139fbb..b5af65a8c99c8 100644
--- a/llvm/test/Transforms/LoopVectorize/increment.ll
+++ b/llvm/test/Transforms/LoopVectorize/increment.ll
@@ -19,11 +19,11 @@ define void @inc(i32 %n) nounwind uwtable noinline ssp {
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
   %4 = trunc i64 %indvars.iv to i32
   %5 = add nsw i32 %3, %4
-  store i32 %5, i32* %2, align 4
+  store i32 %5, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -41,20 +41,20 @@ define void @inc(i32 %n) nounwind uwtable noinline ssp {
 ;CHECK-LABEL: @histogram(
 ;CHECK-NOT: <4 x i32>
 ;CHECK: ret i32
-define i32 @histogram(i32* nocapture noalias %A, i32* nocapture noalias %B, i32 %n) nounwind uwtable ssp {
+define i32 @histogram(ptr nocapture noalias %A, ptr nocapture noalias %B, i32 %n) nounwind uwtable ssp {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %idxprom1 = sext i32 %0 to i64
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %idxprom1
+  %1 = load i32, ptr %arrayidx2, align 4
   %inc = add nsw i32 %1, 1
-  store i32 %inc, i32* %arrayidx2, align 4
+  store i32 %inc, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n

diff  --git a/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll b/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll
index 4af5ba852c947..692bedcd2ed85 100644
--- a/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll
@@ -3,7 +3,7 @@
 
 
 ; Test case where %gep has multiple uses of %iv.
-define void @multiple_iv_uses_in_same_instruction([100 x [100 x i32]]* %ptr) {
+define void @multiple_iv_uses_in_same_instruction(ptr %ptr) {
 ; CHECK-LABEL: @multiple_iv_uses_in_same_instruction(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -16,10 +16,10 @@ define void @multiple_iv_uses_in_same_instruction([100 x [100 x i32]]* %ptr) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP0]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* [[PTR:%.*]], i64 0, i64 [[TMP3]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* [[PTR]], i64 0, i64 [[TMP4]], i64 [[TMP4]]
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[TMP5]], align 4
-; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [100 x [100 x i32]], ptr [[PTR:%.*]], i64 0, i64 [[TMP3]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [100 x [100 x i32]], ptr [[PTR]], i64 0, i64 [[TMP4]], i64 [[TMP4]]
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[TMP5]], align 4
+; CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -31,9 +31,9 @@ define void @multiple_iv_uses_in_same_instruction([100 x [100 x i32]]* %ptr) {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* [[PTR]], i64 0, i64 [[IV]], i64 [[IV]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [100 x [100 x i32]], ptr [[PTR]], i64 0, i64 [[IV]], i64 [[IV]]
 ; CHECK-NEXT:    [[T:%.*]] = trunc i64 [[IV]] to i32
-; CHECK-NEXT:    store i32 [[T]], i32* [[GEP]], align 4
+; CHECK-NEXT:    store i32 [[T]], ptr [[GEP]], align 4
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -45,9 +45,9 @@ entry:
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %gep = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %ptr, i64 0, i64 %iv, i64 %iv
+  %gep = getelementptr inbounds [100 x [100 x i32]], ptr %ptr, i64 0, i64 %iv, i64 %iv
   %t = trunc i64 %iv to i32
-  store i32 %t, i32* %gep, align 4
+  store i32 %t, ptr %gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 100
   br i1 %exitcond, label %exit, label %loop

diff  --git a/llvm/test/Transforms/LoopVectorize/induction-unroll-novec.ll b/llvm/test/Transforms/LoopVectorize/induction-unroll-novec.ll
index d88284afdce60..d40544225754e 100644
--- a/llvm/test/Transforms/LoopVectorize/induction-unroll-novec.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction-unroll-novec.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 
 ; Test for PR54427.
-define void @test_nonconst_start_and_step(i32* %dst, i32 %start, i32 %step, i64 %N) {
+define void @test_nonconst_start_and_step(ptr %dst, i32 %start, i32 %step, i64 %N) {
 ; CHECK-LABEL: @test_nonconst_start_and_step(
 ; CHECK:         [[NEG_STEP:%.+]] = sub i32 0, %step
 ; CHECK:       vector.body:
@@ -20,10 +20,10 @@ define void @test_nonconst_start_and_step(i32* %dst, i32 %start, i32 %step, i64
 ; CHECK-NEXT:    [[INDUCTION4:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw i32 [[INDUCTION]], %step
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub nsw i32 [[INDUCTION2]], %step
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[INDUCTION3]]
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDUCTION4]]
-; CHECK-NEXT:    store i32 [[TMP6]], i32* [[TMP8]], align 2
-; CHECK-NEXT:    store i32 [[TMP7]], i32* [[TMP9]], align 2
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDUCTION3]]
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDUCTION4]]
+; CHECK-NEXT:    store i32 [[TMP6]], ptr [[TMP8]], align 2
+; CHECK-NEXT:    store i32 [[TMP7]], ptr [[TMP9]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]]
 ; CHECK-NEXT:    br i1 [[TMP10]], label %middle.block, label %vector.body
@@ -35,8 +35,8 @@ loop:
   %primary.iv = phi i64 [ 0, %entry ], [ %primary.iv.next, %loop ]
   %iv.down = phi i32 [ %start, %entry ], [ %iv.down.next, %loop ]
   %iv.down.next = sub nsw i32 %iv.down, %step
-  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 %primary.iv
-  store i32 %iv.down.next, i32* %gep.dst, align 2
+  %gep.dst = getelementptr inbounds i32, ptr %dst, i64 %primary.iv
+  store i32 %iv.down.next, ptr %gep.dst, align 2
   %primary.iv.next = add nuw nsw i64 %primary.iv, 1
   %exitcond = icmp eq i64 %primary.iv.next, %N
   br i1 %exitcond, label %exit, label %loop

diff  --git a/llvm/test/Transforms/LoopVectorize/infiniteloop.ll b/llvm/test/Transforms/LoopVectorize/infiniteloop.ll
index fe69b44d957ba..f2a900e642a3a 100644
--- a/llvm/test/Transforms/LoopVectorize/infiniteloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/infiniteloop.ll
@@ -17,18 +17,18 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 ; CHECK-LABEL: @fn1(
 define void @fn1()  {
 entry:
-  store i64 0, i64* @a, align 8
+  store i64 0, ptr @a, align 8
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %inc1 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  store volatile i32 0, i32* @x, align 4
+  store volatile i32 0, ptr @x, align 4
   %inc = add nsw i64 %inc1, 1
   %cmp = icmp sgt i64 %inc1, -2
   br i1 %cmp, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   %inc.lcssa = phi i64 [ %inc, %for.body ]
-  store i64 %inc.lcssa, i64* @a, align 8
+  store i64 %inc.lcssa, ptr @a, align 8
   ret void
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/int_sideeffect.ll b/llvm/test/Transforms/LoopVectorize/int_sideeffect.ll
index 67cef7c5f5b8c..7edf0d26ee82c 100644
--- a/llvm/test/Transforms/LoopVectorize/int_sideeffect.ll
+++ b/llvm/test/Transforms/LoopVectorize/int_sideeffect.ll
@@ -6,15 +6,15 @@ declare void @llvm.sideeffect()
 
 ; CHECK-LABEL: store_ones
 ; CHECK: store <4 x float>
-define void @store_ones(float* %p, i64 %n) nounwind {
+define void @store_ones(ptr %p, i64 %n) nounwind {
 bb7.lr.ph:
   br label %bb7
 
 bb7:
   %i.02 = phi i64 [ 0, %bb7.lr.ph ], [ %tmp13, %bb7 ]
   call void @llvm.sideeffect()
-  %tmp10 = getelementptr inbounds float, float* %p, i64 %i.02
-  store float 1.0, float* %tmp10, align 4
+  %tmp10 = getelementptr inbounds float, ptr %p, i64 %i.02
+  store float 1.0, ptr %tmp10, align 4
   %tmp13 = add i64 %i.02, 1
   %tmp6 = icmp ult i64 %tmp13, %n
   br i1 %tmp6, label %bb7, label %bb14

diff  --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-1.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-1.ll
index c6bf6da21ef64..d42a15a60f042 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-1.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-1.ll
@@ -36,13 +36,13 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ;}
 
 ; CHECK: vector.body:
-; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
+; CHECK: %wide.vec = load <8 x i32>, ptr {{.*}}, align 4
 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 
 %class.Complex = type { float, float }
 
-define void @_Z4testP7ComplexS0_mm(%class.Complex* noalias nocapture %out, %class.Complex* noalias nocapture readonly %in, i64 %out_start, i64 %size) local_unnamed_addr {
+define void @_Z4testP7ComplexS0_mm(ptr noalias nocapture %out, ptr noalias nocapture readonly %in, i64 %out_start, i64 %size) local_unnamed_addr {
 entry:
   %cmp9 = icmp eq i64 %size, 0
   br i1 %cmp9, label %for.cond.cleanup, label %for.body.preheader
@@ -59,19 +59,16 @@ for.cond.cleanup:
 for.body:
   %out_offset.010 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %add = add i64 %out_offset.010, %out_start
-  %arrayidx = getelementptr inbounds %class.Complex, %class.Complex* %in, i64 %add
-  %0 = bitcast %class.Complex* %arrayidx to i32*
-  %1 = load i32, i32* %0, align 4
-  %imaginary_.i.i = getelementptr inbounds %class.Complex, %class.Complex* %in, i64 %add, i32 1
-  %2 = bitcast float* %imaginary_.i.i to i32*
-  %3 = load i32, i32* %2, align 4
-  %arrayidx1 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %add
-  %4 = bitcast %class.Complex* %arrayidx1 to i64*
-  %t0.sroa.4.0.insert.ext = zext i32 %3 to i64
+  %arrayidx = getelementptr inbounds %class.Complex, ptr %in, i64 %add
+  %0 = load i32, ptr %arrayidx, align 4
+  %imaginary_.i.i = getelementptr inbounds %class.Complex, ptr %in, i64 %add, i32 1
+  %1 = load i32, ptr %imaginary_.i.i, align 4
+  %arrayidx1 = getelementptr inbounds %class.Complex, ptr %out, i64 %add
+  %t0.sroa.4.0.insert.ext = zext i32 %1 to i64
   %t0.sroa.4.0.insert.shift = shl nuw i64 %t0.sroa.4.0.insert.ext, 32
-  %t0.sroa.0.0.insert.ext = zext i32 %1 to i64
+  %t0.sroa.0.0.insert.ext = zext i32 %0 to i64
   %t0.sroa.0.0.insert.insert = or i64 %t0.sroa.4.0.insert.shift, %t0.sroa.0.0.insert.ext
-  store i64 %t0.sroa.0.0.insert.insert, i64* %4, align 4
+  store i64 %t0.sroa.0.0.insert.insert, ptr %arrayidx1, align 4
   %inc = add nuw i64 %out_offset.010, 1
   %exitcond = icmp eq i64 %inc, %size
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-2.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-2.ll
index eb66743888d51..aebbcd526fc43 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-2.ll
@@ -16,7 +16,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; will not be invalidated by the wrapping checks.
 
 ; #include <stdlib.h>
-; void test(float * __restrict__ out, float * __restrict__ in, size_t size)
+; void test(ptr __restrict__ out, ptr __restrict__ in, size_t size)
 ; {
 ;    for (size_t out_offset = 0; out_offset < size; ++out_offset)
 ;      {
@@ -26,10 +26,10 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; }
 
 ; CHECK: vector.body:
-; CHECK-NOT: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
+; CHECK-NOT: %wide.vec = load <8 x i32>, ptr {{.*}}, align 4
 ; CHECK-NOT: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 
-define void @_Z4testPfS_m(float* noalias nocapture %out, float* noalias nocapture readonly %in, i64 %size) local_unnamed_addr {
+define void @_Z4testPfS_m(ptr noalias nocapture %out, ptr noalias nocapture readonly %in, i64 %size) local_unnamed_addr {
 entry:
   %cmp7 = icmp eq i64 %size, 0
   br i1 %cmp7, label %for.cond.cleanup, label %for.body.preheader
@@ -46,12 +46,10 @@ for.cond.cleanup:
 for.body:
   %out_offset.08 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %mul = shl i64 %out_offset.08, 1
-  %arrayidx = getelementptr inbounds float, float* %in, i64 %mul
-  %0 = bitcast float* %arrayidx to i32*
-  %1 = load i32, i32* %0, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %out, i64 %out_offset.08
-  %2 = bitcast float* %arrayidx1 to i32*
-  store i32 %1, i32* %2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %in, i64 %mul
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %out, i64 %out_offset.08
+  store i32 %0, ptr %arrayidx1, align 4
   %inc = add nuw i64 %out_offset.08, 1
   %exitcond = icmp eq i64 %inc, %size
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll
index c30cfb40aa875..53201e1a47870 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll
@@ -18,15 +18,15 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; void func(unsigned * __restrict a, unsigned * __restrict b, unsigned char x, unsigned char y) {
 ;  int i = 0;
 ;  for (unsigned char index = x; i < y; index +=2, ++i)
-;    b[i] = a[index] * 2;
+;    b[i] = aptr 2;
 ;
 ; }
 
 ; CHECK: vector.body:
-; CHECK-NOT: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
+; CHECK-NOT: %wide.vec = load <8 x i32>, ptr {{.*}}, align 4
 ; CHECK-NOT: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 
-define void @_Z4funcPjS_hh(i32* noalias nocapture readonly %a, i32* noalias nocapture %b, i8 zeroext %x, i8 zeroext %y) local_unnamed_addr {
+define void @_Z4funcPjS_hh(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i8 zeroext %x, i8 zeroext %y) local_unnamed_addr {
 entry:
   %cmp9 = icmp eq i8 %y, 0
   br i1 %cmp9, label %for.cond.cleanup, label %for.body.preheader
@@ -45,11 +45,11 @@ for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %index.011 = phi i8 [ %add, %for.body ], [ %x, %for.body.preheader ]
   %idxprom = zext i8 %index.011 to i64
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom
+  %0 = load i32, ptr %arrayidx, align 4
   %mul = shl i32 %0, 1
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  store i32 %mul, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  store i32 %mul, ptr %arrayidx2, align 4
   %add = add i8 %index.011, 2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count

diff  --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-alias.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-alias.ll
index 4454cf5dbbce1..7dcae9ea09d3d 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-alias.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-alias.ll
@@ -10,41 +10,39 @@ target triple = "arm64-apple-ios5.0.0"
 %struct.Vec4r = type { double, double, double, double }
 %struct.Vec2r = type { double, double }
 
-define void @foobar(%struct.Vec4r* nocapture readonly %p, i32 %i)
+define void @foobar(ptr nocapture readonly %p, i32 %i)
 {
 entry:
   %cp = alloca [20 x %struct.Vec2r], align 8
-  %0 = bitcast [20 x %struct.Vec2r]* %cp to i8*
   br label %for.body
 
 for.cond.cleanup:                                 ; preds = %for.body
-  %arraydecay = getelementptr inbounds [20 x %struct.Vec2r], [20 x %struct.Vec2r]* %cp, i64 0, i64 0
-  call void @g(%struct.Vec2r* nonnull %arraydecay) #4
+  call void @g(ptr nonnull %cp) #4
   ret void
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %x = getelementptr inbounds %struct.Vec4r, %struct.Vec4r* %p, i64 %indvars.iv, i32 0
-  %1 = load double, double* %x, align 8, !tbaa !3
-  %mul = fmul double %1, 2.000000e+00
-  %x4 = getelementptr inbounds [20 x %struct.Vec2r], [20 x %struct.Vec2r]* %cp, i64 0, i64 %indvars.iv, i32 0
+  %x = getelementptr inbounds %struct.Vec4r, ptr %p, i64 %indvars.iv, i32 0
+  %0 = load double, ptr %x, align 8, !tbaa !3
+  %mul = fmul double %0, 2.000000e+00
+  %x4 = getelementptr inbounds [20 x %struct.Vec2r], ptr %cp, i64 0, i64 %indvars.iv, i32 0
 
 ; The new store should alias any double rather than one of the fields of Vec2r.
 ; CHECK: store <4 x double> {{.*}} !tbaa ![[STORE_TBAA:[0-9]+]]
 ; CHECK-DAG: ![[DOUBLE_TBAA:[0-9]+]] = !{!"double", !{{[0-9+]}}, i64 0}
 ; CHECK-DAG: ![[STORE_TBAA]] = !{![[DOUBLE_TBAA]], ![[DOUBLE_TBAA]], i64 0}
-  store double %mul, double* %x4, align 8, !tbaa !8
-  %y = getelementptr inbounds %struct.Vec4r, %struct.Vec4r* %p, i64 %indvars.iv, i32 1
-  %2 = load double, double* %y, align 8, !tbaa !10
-  %mul7 = fmul double %2, 3.000000e+00
-  %y10 = getelementptr inbounds [20 x %struct.Vec2r], [20 x %struct.Vec2r]* %cp, i64 0, i64 %indvars.iv, i32 1
-  store double %mul7, double* %y10, align 8, !tbaa !11
+  store double %mul, ptr %x4, align 8, !tbaa !8
+  %y = getelementptr inbounds %struct.Vec4r, ptr %p, i64 %indvars.iv, i32 1
+  %1 = load double, ptr %y, align 8, !tbaa !10
+  %mul7 = fmul double %1, 3.000000e+00
+  %y10 = getelementptr inbounds [20 x %struct.Vec2r], ptr %cp, i64 0, i64 %indvars.iv, i32 1
+  store double %mul7, ptr %y10, align 8, !tbaa !11
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 4
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-declare void @g(%struct.Vec2r*)
+declare void @g(ptr)
 
 !llvm.module.flags = !{!0, !1}
 !llvm.ident = !{!2}

diff  --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-masked-group.ll
index e6a77b7eb8fd4..7d335a8d56f6f 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-masked-group.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-masked-group.ll
@@ -36,12 +36,12 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
 
 ; STRIDED_MASKED: LV: Checking a loop in 'masked_strided1'
 ; STRIDED_MASKED: LV: Analyzing interleaved accesses...
-; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 %{{.*}}, i8* %{{.*}}, align 1
-; STRIDED_MASKED-NEXT: LV: Inserted:  store i8  %{{.*}}, i8* %{{.*}}, align 1
-; STRIDED_MASKED-NEXT:     into the interleave group with  store i8 %{{.*}}, i8* %{{.*}}, align 1
-; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:   %{{.*}} = load i8, i8* %{{.*}}, align 1
-; STRIDED_MASKED-NEXT: LV: Inserted:  %{{.*}} = load i8, i8* %{{.*}}, align 1
-; STRIDED_MASKED-NEXT:     into the interleave group with   %{{.*}} = load i8, i8* %{{.*}}, align 1
+; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 %{{.*}}, ptr %{{.*}}, align 1
+; STRIDED_MASKED-NEXT: LV: Inserted:  store i8  %{{.*}}, ptr %{{.*}}, align 1
+; STRIDED_MASKED-NEXT:     into the interleave group with  store i8 %{{.*}}, ptr %{{.*}}, align 1
+; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:   %{{.*}} = load i8, ptr %{{.*}}, align 1
+; STRIDED_MASKED-NEXT: LV: Inserted:  %{{.*}} = load i8, ptr %{{.*}}, align 1
+; STRIDED_MASKED-NEXT:     into the interleave group with   %{{.*}} = load i8, ptr %{{.*}}, align 1
 
 ; Scenario 2: Check the case where it is illegal to create a masked interleave-
 ; group because the first access is predicated, and the second isn't.
@@ -65,14 +65,14 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
 
 ; STRIDED_UNMASKED: LV: Checking a loop in 'masked_strided2'
 ; STRIDED_UNMASKED: LV: Analyzing interleaved accesses...
-; STRIDED_UNMASKED-NEXT: LV: Creating an interleave group with:  store i8 1, i8* %{{.*}}, align 1
+; STRIDED_UNMASKED-NEXT: LV: Creating an interleave group with:  store i8 1, ptr %{{.*}}, align 1
 ; STRIDED_UNMASKED-NEXT: LV: Invalidate candidate interleaved store group due to gaps.
 ; STRIDED_UNMASKED-NOT: LV: Creating an interleave group
 
 ; STRIDED_MASKED: LV: Checking a loop in 'masked_strided2'
 ; STRIDED_MASKED: LV: Analyzing interleaved accesses...
-; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 2, i8* %{{.*}}, align 1
-; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 1, i8* %{{.*}}, align 1
+; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 2, ptr %{{.*}}, align 1
+; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 1, ptr %{{.*}}, align 1
 ; STRIDED_MASKED-NOT: LV: Invalidate candidate interleaved store group due to gaps.
 
 
@@ -103,8 +103,8 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
 
 ; STRIDED_MASKED: LV: Checking a loop in 'masked_strided3'
 ; STRIDED_MASKED: LV: Analyzing interleaved accesses...
-; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 2, i8* %{{.*}}, align 1
-; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 1, i8* %{{.*}}, align 1
+; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 2, ptr %{{.*}}, align 1
+; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 1, ptr %{{.*}}, align 1
 ; STRIDED_MASKED-NOT: LV: Invalidate candidate interleaved store group due to gaps.
 
 
@@ -113,7 +113,7 @@ source_filename = "test.c"
 target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
 target triple = "i386-unknown-linux-gnu"
 
-define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 {
+define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 {
 entry:
   %conv = zext i8 %guard to i32
   br label %for.body
@@ -125,18 +125,18 @@ for.body:
 
 if.then:
   %mul = shl nuw nsw i32 %ix.024, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
   %add = or i32 %mul, 1
-  %arrayidx4 = getelementptr inbounds i8, i8* %p, i32 %add
-  %1 = load i8, i8* %arrayidx4, align 1
+  %arrayidx4 = getelementptr inbounds i8, ptr %p, i32 %add
+  %1 = load i8, ptr %arrayidx4, align 1
   %cmp.i = icmp slt i8 %0, %1
   %spec.select.i = select i1 %cmp.i, i8 %1, i8 %0
-  %arrayidx6 = getelementptr inbounds i8, i8* %q, i32 %mul
-  store i8 %spec.select.i, i8* %arrayidx6, align 1
+  %arrayidx6 = getelementptr inbounds i8, ptr %q, i32 %mul
+  store i8 %spec.select.i, ptr %arrayidx6, align 1
   %sub = sub i8 0, %spec.select.i
-  %arrayidx11 = getelementptr inbounds i8, i8* %q, i32 %add
-  store i8 %sub, i8* %arrayidx11, align 1
+  %arrayidx11 = getelementptr inbounds i8, ptr %q, i32 %add
+  store i8 %sub, ptr %arrayidx11, align 1
   br label %for.inc
 
 for.inc:
@@ -149,7 +149,7 @@ for.end:
 }
 
 
-define dso_local void @masked_strided2(i8* noalias nocapture readnone %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 {
+define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 {
 entry:
   %conv = zext i8 %guard to i32
   br label %for.body
@@ -157,15 +157,15 @@ entry:
 for.body:
   %ix.012 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
   %mul = shl nuw nsw i32 %ix.012, 1
-  %arrayidx = getelementptr inbounds i8, i8* %q, i32 %mul
-  store i8 1, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %q, i32 %mul
+  store i8 1, ptr %arrayidx, align 1
   %cmp1 = icmp ugt i32 %ix.012, %conv
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:
   %add = or i32 %mul, 1
-  %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %add
-  store i8 2, i8* %arrayidx3, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %q, i32 %add
+  store i8 2, ptr %arrayidx3, align 1
   br label %for.inc
 
 for.inc:
@@ -178,7 +178,7 @@ for.end:
 }
 
 
-define dso_local void @masked_strided3(i8* noalias nocapture readnone %p, i8* noalias nocapture %q, i8 zeroext %guard1, i8 zeroext %guard2) local_unnamed_addr #0 {
+define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr noalias nocapture %q, i8 zeroext %guard1, i8 zeroext %guard2) local_unnamed_addr #0 {
 entry:
   %conv = zext i8 %guard1 to i32
   %conv3 = zext i8 %guard2 to i32
@@ -191,8 +191,8 @@ for.body:
   br i1 %cmp1, label %if.then, label %if.end
 
 if.then:
-  %arrayidx = getelementptr inbounds i8, i8* %q, i32 %mul
-  store i8 1, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %q, i32 %mul
+  store i8 1, ptr %arrayidx, align 1
   br label %if.end
 
 if.end:
@@ -201,8 +201,8 @@ if.end:
 
 if.then6:
   %add = or i32 %mul, 1
-  %arrayidx7 = getelementptr inbounds i8, i8* %q, i32 %add
-  store i8 2, i8* %arrayidx7, align 1
+  %arrayidx7 = getelementptr inbounds i8, ptr %q, i32 %add
+  store i8 2, ptr %arrayidx7, align 1
   br label %for.inc
 
 for.inc:

diff  --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
index 652fc3bba993b..d307dd11dfafb 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
@@ -14,7 +14,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 ;
 ;
 
-define void @interleaved_with_cond_store_0(%pair *%p, i64 %x, i64 %n) {
+define void @interleaved_with_cond_store_0(ptr %p, i64 %x, i64 %n) {
 ; CHECK-LABEL: @interleaved_with_cond_store_0(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
@@ -30,26 +30,25 @@ define void @interleaved_with_cond_store_0(%pair *%p, i64 %x, i64 %n) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[PAIR:%.*]], %pair* [[P:%.*]], i64 [[INDEX]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <4 x i64>*
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <4 x i64>, <4 x i64>* [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <2 x i64> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP2_1:%.*]] = getelementptr inbounds [[PAIR:%.*]], %pair* [[P:%.*]], i64 [[INDEX]], i32 1
+; CHECK-NEXT:    [[TMP2_1:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
-; CHECK-NEXT:    store i64 [[TMP6]], i64* [[TMP2_1]], align 8
+; CHECK-NEXT:    store i64 [[TMP6]], ptr [[TMP2_1]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP4]], i64 1
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.if1:
 ; CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[TMP8]], i32 1
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP8]], i32 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2
-; CHECK-NEXT:    store i64 [[TMP10]], i64* [[TMP9]], align 8
+; CHECK-NEXT:    store i64 [[TMP10]], ptr [[TMP9]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.continue2:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -62,12 +61,12 @@ define void @interleaved_with_cond_store_0(%pair *%p, i64 %x, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[IF_MERGE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[P_1:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[I]], i32 1
-; CHECK-NEXT:    [[TMP12:%.*]] = load i64, i64* [[P_1]], align 8
+; CHECK-NEXT:    [[P_1:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[I]], i32 1
+; CHECK-NEXT:    [[TMP12:%.*]] = load i64, ptr [[P_1]], align 8
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[TMP12]], [[X]]
 ; CHECK-NEXT:    br i1 [[TMP13]], label [[IF_THEN:%.*]], label [[IF_MERGE]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    store i64 [[TMP12]], i64* [[P_1]], align 8
+; CHECK-NEXT:    store i64 [[TMP12]], ptr [[P_1]], align 8
 ; CHECK-NEXT:    br label [[IF_MERGE]]
 ; CHECK:       if.merge:
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -81,13 +80,13 @@ entry:
 
 for.body:
   %i  = phi i64 [ %i.next, %if.merge ], [ 0, %entry ]
-  %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
-  %0 = load i64, i64* %p.1, align 8
+  %p.1 = getelementptr inbounds %pair, ptr %p, i64 %i, i32 1
+  %0 = load i64, ptr %p.1, align 8
   %1 = icmp eq i64 %0, %x
   br i1 %1, label %if.then, label %if.merge
 
 if.then:
-  store i64 %0, i64* %p.1, align 8
+  store i64 %0, ptr %p.1, align 8
   br label %if.merge
 
 if.merge:
@@ -110,7 +109,7 @@ for.end:
 ;
 ;
 
-define void @interleaved_with_cond_store_1(%pair *%p, i64 %x, i64 %n) {
+define void @interleaved_with_cond_store_1(ptr %p, i64 %x, i64 %n) {
 ; CHECK-LABEL: @interleaved_with_cond_store_1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
@@ -127,35 +126,33 @@ define void @interleaved_with_cond_store_1(%pair *%p, i64 %x, i64 %n) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], %pair* [[P:%.*]], i64 [[INDEX]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[INDEX]], i32 1
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[TMP2]], i32 1
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[TMP4]] to <4 x i64>*
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <4 x i64>, <4 x i64>* [[TMP6]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 1
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq <2 x i64> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [[PAIR:%.*]], %pair* [[P:%.*]], i64 [[INDEX]], i32 0
+; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
-; CHECK-NEXT:    store i64 [[TMP9]], i64* [[PTR0]], align 8
+; CHECK-NEXT:    store i64 [[TMP9]], ptr [[PTR0]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i1> [[TMP7]], i64 1
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.if1:
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2
-; CHECK-NEXT:    store i64 [[TMP12]], i64* [[TMP11]], align 8
+; CHECK-NEXT:    store i64 [[TMP12]], ptr [[TMP11]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.continue2:
-; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i64* [[TMP3]] to <4 x i64>*
-; CHECK-NEXT:    [[WIDE_VEC3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP13]], align 8
+; CHECK-NEXT:    [[WIDE_VEC3:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i64> [[WIDE_VEC3]], i64 0
-; CHECK-NEXT:    store i64 [[TMP14]], i64* [[TMP4]], align 8
+; CHECK-NEXT:    store i64 [[TMP14]], ptr [[TMP4]], align 8
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i64> [[WIDE_VEC3]], i64 2
-; CHECK-NEXT:    store i64 [[TMP15]], i64* [[TMP5]], align 8
+; CHECK-NEXT:    store i64 [[TMP15]], ptr [[TMP5]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -166,17 +163,17 @@ define void @interleaved_with_cond_store_1(%pair *%p, i64 %x, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[IF_MERGE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[P_0:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[I]], i32 0
-; CHECK-NEXT:    [[P_1:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[I]], i32 1
-; CHECK-NEXT:    [[TMP17:%.*]] = load i64, i64* [[P_1]], align 8
+; CHECK-NEXT:    [[P_0:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[I]], i32 0
+; CHECK-NEXT:    [[P_1:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[I]], i32 1
+; CHECK-NEXT:    [[TMP17:%.*]] = load i64, ptr [[P_1]], align 8
 ; CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[TMP17]], [[X]]
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[IF_THEN:%.*]], label [[IF_MERGE]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    store i64 [[TMP17]], i64* [[P_0]], align 8
+; CHECK-NEXT:    store i64 [[TMP17]], ptr [[P_0]], align 8
 ; CHECK-NEXT:    br label [[IF_MERGE]]
 ; CHECK:       if.merge:
-; CHECK-NEXT:    [[TMP19:%.*]] = load i64, i64* [[P_0]], align 8
-; CHECK-NEXT:    store i64 [[TMP19]], i64* [[P_1]], align 8
+; CHECK-NEXT:    [[TMP19:%.*]] = load i64, ptr [[P_0]], align 8
+; CHECK-NEXT:    store i64 [[TMP19]], ptr [[P_1]], align 8
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -188,19 +185,19 @@ entry:
 
 for.body:
   %i  = phi i64 [ %i.next, %if.merge ], [ 0, %entry ]
-  %p.0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
-  %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
-  %0 = load i64, i64* %p.1, align 8
+  %p.0 = getelementptr inbounds %pair, ptr %p, i64 %i, i32 0
+  %p.1 = getelementptr inbounds %pair, ptr %p, i64 %i, i32 1
+  %0 = load i64, ptr %p.1, align 8
   %1 = icmp eq i64 %0, %x
   br i1 %1, label %if.then, label %if.merge
 
 if.then:
-  store i64 %0, i64* %p.0, align 8
+  store i64 %0, ptr %p.0, align 8
   br label %if.merge
 
 if.merge:
-  %2 = load i64, i64* %p.0, align 8
-  store i64 %2, i64 *%p.1, align 8
+  %2 = load i64, ptr %p.0, align 8
+  store i64 %2, ptr %p.1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -219,7 +216,7 @@ for.end:
 ;
 ;
 
-define void @interleaved_with_cond_store_2(%pair *%p, i64 %x, i64 %n) {
+define void @interleaved_with_cond_store_2(ptr %p, i64 %x, i64 %n) {
 ; CHECK-LABEL: @interleaved_with_cond_store_2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
@@ -236,29 +233,28 @@ define void @interleaved_with_cond_store_2(%pair *%p, i64 %x, i64 %n) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], %pair* [[P:%.*]], i64 [[INDEX]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[TMP2]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[INDEX]], i32 1
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <4 x i64>*
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <4 x i64>, <4 x i64>* [[TMP6]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    store i64 [[X]], i64* [[TMP3]], align 8
-; CHECK-NEXT:    store i64 [[X]], i64* [[TMP4]], align 8
+; CHECK-NEXT:    store i64 [[X]], ptr [[TMP3]], align 8
+; CHECK-NEXT:    store i64 [[X]], ptr [[TMP4]], align 8
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq <2 x i64> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[INDEX]], i32 1
+; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
-; CHECK-NEXT:    store i64 [[TMP9]], i64* [[PTR1]], align 8
+; CHECK-NEXT:    store i64 [[TMP9]], ptr [[PTR1]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i1> [[TMP7]], i64 1
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.if1:
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[TMP2]], i32 1
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 1
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2
-; CHECK-NEXT:    store i64 [[TMP12]], i64* [[TMP11]], align 8
+; CHECK-NEXT:    store i64 [[TMP12]], ptr [[TMP11]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.continue2:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -271,14 +267,14 @@ define void @interleaved_with_cond_store_2(%pair *%p, i64 %x, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[IF_MERGE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[P_0:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[I]], i32 0
-; CHECK-NEXT:    [[P_1:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[I]], i32 1
-; CHECK-NEXT:    [[TMP14:%.*]] = load i64, i64* [[P_1]], align 8
-; CHECK-NEXT:    store i64 [[X]], i64* [[P_0]], align 8
+; CHECK-NEXT:    [[P_0:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[I]], i32 0
+; CHECK-NEXT:    [[P_1:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[I]], i32 1
+; CHECK-NEXT:    [[TMP14:%.*]] = load i64, ptr [[P_1]], align 8
+; CHECK-NEXT:    store i64 [[X]], ptr [[P_0]], align 8
 ; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[TMP14]], [[X]]
 ; CHECK-NEXT:    br i1 [[TMP15]], label [[IF_THEN:%.*]], label [[IF_MERGE]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    store i64 [[TMP14]], i64* [[P_1]], align 8
+; CHECK-NEXT:    store i64 [[TMP14]], ptr [[P_1]], align 8
 ; CHECK-NEXT:    br label [[IF_MERGE]]
 ; CHECK:       if.merge:
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -292,15 +288,15 @@ entry:
 
 for.body:
   %i  = phi i64 [ %i.next, %if.merge ], [ 0, %entry ]
-  %p.0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
-  %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
-  %0 = load i64, i64* %p.1, align 8
-  store i64 %x, i64* %p.0, align 8
+  %p.0 = getelementptr inbounds %pair, ptr %p, i64 %i, i32 0
+  %p.1 = getelementptr inbounds %pair, ptr %p, i64 %i, i32 1
+  %0 = load i64, ptr %p.1, align 8
+  store i64 %x, ptr %p.0, align 8
   %1 = icmp eq i64 %0, %x
   br i1 %1, label %if.then, label %if.merge
 
 if.then:
-  store i64 %0, i64* %p.1, align 8
+  store i64 %0, ptr %p.1, align 8
   br label %if.merge
 
 if.merge:

diff  --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-uniform-load.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-uniform-load.ll
index c43020eb0edf3..081dcd831a1be 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-uniform-load.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-uniform-load.ll
@@ -4,7 +4,7 @@
 ; by the loop's exit condition, which is not vectorized, and is therefore
 ; considered uniform while also forming an interleave group.
 
-%0 = type { i32 ()*, i32 }
+%0 = type { ptr, i32 }
 
 @0 = internal unnamed_addr constant [59 x %0] [%0 zeroinitializer,
 %0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
@@ -24,24 +24,24 @@
 %0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
 %0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
 %0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
-%0 {i32 ()* null, i32 258}, %0 zeroinitializer, %0 zeroinitializer,
+%0 {ptr null, i32 258}, %0 zeroinitializer, %0 zeroinitializer,
 %0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
 %0 zeroinitializer], align 8
 
 define dso_local void @test_dead_load(i32 %arg) {
 ; CHECK-LABEL: @test_dead_load(
 ; CHECK: vector.body:
-; CHECK: %wide.vec = load <16 x i32>, <16 x i32>* %3, align 8
+; CHECK: %wide.vec = load <16 x i32>, ptr %3, align 8
 ; CHECK: %strided.vec = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
 bb1:
   br label %bb2
 
 bb2:
-  %tmp = phi %0* [ %tmp6, %bb2 ], [ getelementptr inbounds ([59 x %0], [59 x %0]* @0, i64 0, i64 0), %bb1 ]
-  %tmp3 = getelementptr inbounds %0, %0* %tmp, i64 0, i32 1
-  %tmp4 = load i32, i32* %tmp3, align 8
+  %tmp = phi ptr [ %tmp6, %bb2 ], [ @0, %bb1 ]
+  %tmp3 = getelementptr inbounds %0, ptr %tmp, i64 0, i32 1
+  %tmp4 = load i32, ptr %tmp3, align 8
   %tmp5 = icmp eq i32 %tmp4, 258
-  %tmp6 = getelementptr inbounds %0, %0* %tmp, i64 1
+  %tmp6 = getelementptr inbounds %0, ptr %tmp, i64 1
   br i1 %tmp5, label %bb65, label %bb2
 
 bb65:

diff  --git a/llvm/test/Transforms/LoopVectorize/interleaved-acess-with-remarks.ll b/llvm/test/Transforms/LoopVectorize/interleaved-acess-with-remarks.ll
index bc71933315b03..0c8f5da86af2a 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-acess-with-remarks.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-acess-with-remarks.ll
@@ -14,28 +14,26 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 %union.anon = type { i64 }
 %MyStruct = type { i32, %"struct.std::atomic", %union.anon }
 
-define void @atomicLoadsBothWriteAndReadMem(%MyStruct *%a, %MyStruct *%b, %MyStruct *%lim) {
+define void @atomicLoadsBothWriteAndReadMem(ptr %a, ptr %b, ptr %lim) {
 entry:
   br label %loop
 
 loop:
-  %0 = phi %MyStruct* [ %a, %entry ], [ %ainc, %loop ]
-  %1 = phi %MyStruct* [ %b, %entry ], [ %binc, %loop ]
-  %2 = getelementptr %MyStruct, %MyStruct* %1, i64 0, i32 0
-  %3 = load i32, i32* %2, align 8
-  %4 = getelementptr inbounds %MyStruct, %MyStruct* %0, i64 0, i32 0
-  store i32 %3, i32* %4, align 8
-  %5 = getelementptr inbounds %MyStruct, %MyStruct* %1, i64 0, i32 1, i32 0, i32 0
-  %6 = load atomic i32, i32* %5 monotonic, align 4
-  %7 = getelementptr inbounds %MyStruct, %MyStruct* %0, i64 0, i32 1, i32 0, i32 0
-  store atomic i32 %6, i32* %7 monotonic, align 4
-  %8 = getelementptr inbounds %MyStruct, %MyStruct* %1, i64 0, i32 2, i32 0
-  %9 = getelementptr inbounds %MyStruct, %MyStruct* %0, i64 0, i32 2, i32 0
-  %10 = load i64, i64* %8, align 8
-  store i64 %10, i64* %9, align 8
-  %binc = getelementptr inbounds %MyStruct, %MyStruct* %1, i64 1
-  %ainc = getelementptr inbounds %MyStruct, %MyStruct* %0, i64 1
-  %cond = icmp eq %MyStruct* %binc, %lim
+  %0 = phi ptr [ %a, %entry ], [ %ainc, %loop ]
+  %1 = phi ptr [ %b, %entry ], [ %binc, %loop ]
+  %2 = load i32, ptr %1, align 8
+  store i32 %2, ptr %0, align 8
+  %3 = getelementptr inbounds %MyStruct, ptr %1, i64 0, i32 1, i32 0, i32 0
+  %4 = load atomic i32, ptr %3 monotonic, align 4
+  %5 = getelementptr inbounds %MyStruct, ptr %0, i64 0, i32 1, i32 0, i32 0
+  store atomic i32 %4, ptr %5 monotonic, align 4
+  %6 = getelementptr inbounds %MyStruct, ptr %1, i64 0, i32 2, i32 0
+  %7 = getelementptr inbounds %MyStruct, ptr %0, i64 0, i32 2, i32 0
+  %8 = load i64, ptr %6, align 8
+  store i64 %8, ptr %7, align 8
+  %binc = getelementptr inbounds %MyStruct, ptr %1, i64 1
+  %ainc = getelementptr inbounds %MyStruct, ptr %0, i64 1
+  %cond = icmp eq ptr %binc, %lim
   br i1 %cond, label %exit, label %loop
 
 exit:

diff  --git a/llvm/test/Transforms/LoopVectorize/intrinsic.ll b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
index bce75eee5bdae..b2ba7cfbfa3a4 100644
--- a/llvm/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
@@ -5,18 +5,18 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK-LABEL: @sqrt_f32(
 ;CHECK: llvm.sqrt.v4f32
 ;CHECK: ret void
-define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @sqrt_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.sqrt.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -31,18 +31,18 @@ declare float @llvm.sqrt.f32(float) nounwind readnone
 ;CHECK-LABEL: @sqrt_f64(
 ;CHECK: llvm.sqrt.v4f64
 ;CHECK: ret void
-define void @sqrt_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @sqrt_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.sqrt.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -57,18 +57,18 @@ declare double @llvm.sqrt.f64(double) nounwind readnone
 ;CHECK-LABEL: @sin_f32(
 ;CHECK: llvm.sin.v4f32
 ;CHECK: ret void
-define void @sin_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @sin_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.sin.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -83,18 +83,18 @@ declare float @llvm.sin.f32(float) nounwind readnone
 ;CHECK-LABEL: @sin_f64(
 ;CHECK: llvm.sin.v4f64
 ;CHECK: ret void
-define void @sin_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @sin_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.sin.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -109,18 +109,18 @@ declare double @llvm.sin.f64(double) nounwind readnone
 ;CHECK-LABEL: @cos_f32(
 ;CHECK: llvm.cos.v4f32
 ;CHECK: ret void
-define void @cos_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @cos_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.cos.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -135,18 +135,18 @@ declare float @llvm.cos.f32(float) nounwind readnone
 ;CHECK-LABEL: @cos_f64(
 ;CHECK: llvm.cos.v4f64
 ;CHECK: ret void
-define void @cos_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @cos_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.cos.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -161,18 +161,18 @@ declare double @llvm.cos.f64(double) nounwind readnone
 ;CHECK-LABEL: @exp_f32(
 ;CHECK: llvm.exp.v4f32
 ;CHECK: ret void
-define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @exp_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.exp.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -187,18 +187,18 @@ declare float @llvm.exp.f32(float) nounwind readnone
 ;CHECK-LABEL: @exp_f64(
 ;CHECK: llvm.exp.v4f64
 ;CHECK: ret void
-define void @exp_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @exp_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.exp.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -213,18 +213,18 @@ declare double @llvm.exp.f64(double) nounwind readnone
 ;CHECK-LABEL: @exp2_f32(
 ;CHECK: llvm.exp2.v4f32
 ;CHECK: ret void
-define void @exp2_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @exp2_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.exp2.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -239,18 +239,18 @@ declare float @llvm.exp2.f32(float) nounwind readnone
 ;CHECK-LABEL: @exp2_f64(
 ;CHECK: llvm.exp2.v4f64
 ;CHECK: ret void
-define void @exp2_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @exp2_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.exp2.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -265,18 +265,18 @@ declare double @llvm.exp2.f64(double) nounwind readnone
 ;CHECK-LABEL: @log_f32(
 ;CHECK: llvm.log.v4f32
 ;CHECK: ret void
-define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @log_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.log.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -291,18 +291,18 @@ declare float @llvm.log.f32(float) nounwind readnone
 ;CHECK-LABEL: @log_f64(
 ;CHECK: llvm.log.v4f64
 ;CHECK: ret void
-define void @log_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @log_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.log.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -317,18 +317,18 @@ declare double @llvm.log.f64(double) nounwind readnone
 ;CHECK-LABEL: @log10_f32(
 ;CHECK: llvm.log10.v4f32
 ;CHECK: ret void
-define void @log10_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @log10_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.log10.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -343,18 +343,18 @@ declare float @llvm.log10.f32(float) nounwind readnone
 ;CHECK-LABEL: @log10_f64(
 ;CHECK: llvm.log10.v4f64
 ;CHECK: ret void
-define void @log10_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @log10_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.log10.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -369,18 +369,18 @@ declare double @llvm.log10.f64(double) nounwind readnone
 ;CHECK-LABEL: @log2_f32(
 ;CHECK: llvm.log2.v4f32
 ;CHECK: ret void
-define void @log2_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @log2_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.log2.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -395,18 +395,18 @@ declare float @llvm.log2.f32(float) nounwind readnone
 ;CHECK-LABEL: @log2_f64(
 ;CHECK: llvm.log2.v4f64
 ;CHECK: ret void
-define void @log2_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @log2_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.log2.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -421,18 +421,18 @@ declare double @llvm.log2.f64(double) nounwind readnone
 ;CHECK-LABEL: @fabs_f32(
 ;CHECK: llvm.fabs.v4f32
 ;CHECK: ret void
-define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @fabs_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.fabs.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -444,18 +444,18 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare float @llvm.fabs.f32(float) nounwind readnone
 
-define void @fabs_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @fabs_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.fabs(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -470,20 +470,20 @@ declare double @llvm.fabs(double) nounwind readnone
 ;CHECK-LABEL: @copysign_f32(
 ;CHECK: llvm.copysign.v4f32
 ;CHECK: ret void
-define void @copysign_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+define void @copysign_f32(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %z, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx1, align 4
   %call = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -495,20 +495,20 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare float @llvm.copysign.f32(float, float) nounwind readnone
 
-define void @copysign_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z) nounwind uwtable {
+define void @copysign_f64(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
-  %arrayidx1 = getelementptr inbounds double, double* %z, i64 %indvars.iv
-  %1 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds double, ptr %z, i64 %indvars.iv
+  %1 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.copysign(double %0, double %1) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -523,18 +523,18 @@ declare double @llvm.copysign(double, double) nounwind readnone
 ;CHECK-LABEL: @floor_f32(
 ;CHECK: llvm.floor.v4f32
 ;CHECK: ret void
-define void @floor_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @floor_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.floor.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -549,18 +549,18 @@ declare float @llvm.floor.f32(float) nounwind readnone
 ;CHECK-LABEL: @floor_f64(
 ;CHECK: llvm.floor.v4f64
 ;CHECK: ret void
-define void @floor_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @floor_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.floor.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -575,18 +575,18 @@ declare double @llvm.floor.f64(double) nounwind readnone
 ;CHECK-LABEL: @ceil_f32(
 ;CHECK: llvm.ceil.v4f32
 ;CHECK: ret void
-define void @ceil_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @ceil_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.ceil.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -601,18 +601,18 @@ declare float @llvm.ceil.f32(float) nounwind readnone
 ;CHECK-LABEL: @ceil_f64(
 ;CHECK: llvm.ceil.v4f64
 ;CHECK: ret void
-define void @ceil_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @ceil_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.ceil.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -627,18 +627,18 @@ declare double @llvm.ceil.f64(double) nounwind readnone
 ;CHECK-LABEL: @trunc_f32(
 ;CHECK: llvm.trunc.v4f32
 ;CHECK: ret void
-define void @trunc_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @trunc_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.trunc.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -653,18 +653,18 @@ declare float @llvm.trunc.f32(float) nounwind readnone
 ;CHECK-LABEL: @trunc_f64(
 ;CHECK: llvm.trunc.v4f64
 ;CHECK: ret void
-define void @trunc_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @trunc_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.trunc.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -679,18 +679,18 @@ declare double @llvm.trunc.f64(double) nounwind readnone
 ;CHECK-LABEL: @rint_f32(
 ;CHECK: llvm.rint.v4f32
 ;CHECK: ret void
-define void @rint_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @rint_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.rint.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -705,18 +705,18 @@ declare float @llvm.rint.f32(float) nounwind readnone
 ;CHECK-LABEL: @rint_f64(
 ;CHECK: llvm.rint.v4f64
 ;CHECK: ret void
-define void @rint_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @rint_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.rint.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -731,18 +731,18 @@ declare double @llvm.rint.f64(double) nounwind readnone
 ;CHECK-LABEL: @nearbyint_f32(
 ;CHECK: llvm.nearbyint.v4f32
 ;CHECK: ret void
-define void @nearbyint_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @nearbyint_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.nearbyint.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -757,18 +757,18 @@ declare float @llvm.nearbyint.f32(float) nounwind readnone
 ;CHECK-LABEL: @nearbyint_f64(
 ;CHECK: llvm.nearbyint.v4f64
 ;CHECK: ret void
-define void @nearbyint_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @nearbyint_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.nearbyint.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -783,18 +783,18 @@ declare double @llvm.nearbyint.f64(double) nounwind readnone
 ;CHECK-LABEL: @round_f32(
 ;CHECK: llvm.round.v4f32
 ;CHECK: ret void
-define void @round_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @round_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.round.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -809,18 +809,18 @@ declare float @llvm.round.f32(float) nounwind readnone
 ;CHECK-LABEL: @round_f64(
 ;CHECK: llvm.round.v4f64
 ;CHECK: ret void
-define void @round_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @round_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.round.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -835,18 +835,18 @@ declare double @llvm.round.f64(double) nounwind readnone
 ;CHECK-LABEL: @roundeven_f32(
 ;CHECK: llvm.roundeven.v4f32
 ;CHECK: ret void
-define void @roundeven_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @roundeven_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.roundeven.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -861,18 +861,18 @@ declare float @llvm.roundeven.f32(float) nounwind readnone
 ;CHECK-LABEL: @roundeven_f64(
 ;CHECK: llvm.roundeven.v4f64
 ;CHECK: ret void
-define void @roundeven_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @roundeven_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.roundeven.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -887,22 +887,22 @@ declare double @llvm.roundeven.f64(double) nounwind readnone
 ;CHECK-LABEL: @fma_f32(
 ;CHECK: llvm.fma.v4f32
 ;CHECK: ret void
-define void @fma_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable {
+define void @fma_f32(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z, ptr noalias %w) nounwind uwtable {
 entry:
   %cmp12 = icmp sgt i32 %n, 0
   br i1 %cmp12, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %w, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
-  %arrayidx4 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %2 = load float, float* %arrayidx4, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %w, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %z, i64 %indvars.iv
+  %2 = load float, ptr %arrayidx4, align 4
   %3 = tail call float @llvm.fma.f32(float %0, float %2, float %1)
-  %arrayidx6 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %3, float* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %3, ptr %arrayidx6, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -917,22 +917,22 @@ declare float @llvm.fma.f32(float, float, float) nounwind readnone
 ;CHECK-LABEL: @fma_f64(
 ;CHECK: llvm.fma.v4f64
 ;CHECK: ret void
-define void @fma_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable {
+define void @fma_f64(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z, ptr noalias %w) nounwind uwtable {
 entry:
   %cmp12 = icmp sgt i32 %n, 0
   br i1 %cmp12, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
-  %arrayidx2 = getelementptr inbounds double, double* %w, i64 %indvars.iv
-  %1 = load double, double* %arrayidx2, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %z, i64 %indvars.iv
-  %2 = load double, double* %arrayidx4, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %w, i64 %indvars.iv
+  %1 = load double, ptr %arrayidx2, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %z, i64 %indvars.iv
+  %2 = load double, ptr %arrayidx4, align 8
   %3 = tail call double @llvm.fma.f64(double %0, double %2, double %1)
-  %arrayidx6 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %3, double* %arrayidx6, align 8
+  %arrayidx6 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %3, ptr %arrayidx6, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -947,22 +947,22 @@ declare double @llvm.fma.f64(double, double, double) nounwind readnone
 ;CHECK-LABEL: @fmuladd_f32(
 ;CHECK: llvm.fmuladd.v4f32
 ;CHECK: ret void
-define void @fmuladd_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable {
+define void @fmuladd_f32(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z, ptr noalias %w) nounwind uwtable {
 entry:
   %cmp12 = icmp sgt i32 %n, 0
   br i1 %cmp12, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %w, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
-  %arrayidx4 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %2 = load float, float* %arrayidx4, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %w, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %z, i64 %indvars.iv
+  %2 = load float, ptr %arrayidx4, align 4
   %3 = tail call float @llvm.fmuladd.f32(float %0, float %2, float %1)
-  %arrayidx6 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %3, float* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %3, ptr %arrayidx6, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -977,22 +977,22 @@ declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
 ;CHECK-LABEL: @fmuladd_f64(
 ;CHECK: llvm.fmuladd.v4f64
 ;CHECK: ret void
-define void @fmuladd_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable {
+define void @fmuladd_f64(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z, ptr noalias %w) nounwind uwtable {
 entry:
   %cmp12 = icmp sgt i32 %n, 0
   br i1 %cmp12, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
-  %arrayidx2 = getelementptr inbounds double, double* %w, i64 %indvars.iv
-  %1 = load double, double* %arrayidx2, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %z, i64 %indvars.iv
-  %2 = load double, double* %arrayidx4, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %w, i64 %indvars.iv
+  %1 = load double, ptr %arrayidx2, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %z, i64 %indvars.iv
+  %2 = load double, ptr %arrayidx4, align 8
   %3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %1)
-  %arrayidx6 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %3, double* %arrayidx6, align 8
+  %arrayidx6 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %3, ptr %arrayidx6, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1007,20 +1007,20 @@ declare double @llvm.fmuladd.f64(double, double, double) nounwind readnone
 ;CHECK-LABEL: @pow_f32(
 ;CHECK: llvm.pow.v4f32
 ;CHECK: ret void
-define void @pow_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+define void @pow_f32(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z) nounwind uwtable {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %z, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %call = tail call float @llvm.pow.f32(float %0, float %1) nounwind readnone
-  %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1035,20 +1035,20 @@ declare float @llvm.pow.f32(float, float) nounwind readnone
 ;CHECK-LABEL: @pow_f64(
 ;CHECK: llvm.pow.v4f64
 ;CHECK: ret void
-define void @pow_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z) nounwind uwtable {
+define void @pow_f64(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z) nounwind uwtable {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
-  %arrayidx2 = getelementptr inbounds double, double* %z, i64 %indvars.iv
-  %1 = load double, double* %arrayidx2, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %z, i64 %indvars.iv
+  %1 = load double, ptr %arrayidx2, align 8
   %call = tail call double @llvm.pow.f64(double %0, double %1) nounwind readnone
-  %arrayidx4 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx4, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx4, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1061,16 +1061,16 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: fabs_libm
 ; CHECK:  call <4 x float> @llvm.fabs.v4f32
 ; CHECK: ret void
-define void @fabs_libm(float* nocapture %x) nounwind {
+define void @fabs_libm(ptr nocapture %x) nounwind {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @fabsf(float %0) nounwind readnone
-  store float %call, float* %arrayidx, align 4
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
@@ -1095,16 +1095,16 @@ define internal float @roundf(float %x) nounwind readnone {
 ; CHECK-LABEL: internal_round
 ; CHECK-NOT:  load <4 x float>
 
-define void @internal_round(float* nocapture %x) nounwind {
+define void @internal_round(ptr nocapture %x) nounwind {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @roundf(float %0) nounwind readnone
-  store float %call, float* %arrayidx, align 4
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
@@ -1122,15 +1122,15 @@ declare void @round(double %f)
 ; CHECK-LABEL: wrong_signature
 ; CHECK-NOT:  load <4 x double>
 
-define void @wrong_signature(double* nocapture %x) nounwind {
+define void @wrong_signature(ptr nocapture %x) nounwind {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 4
-  store double %0, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 4
+  store double %0, ptr %arrayidx, align 4
   tail call void @round(double %0) nounwind readnone
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -1146,18 +1146,18 @@ declare double @llvm.powi.f64.i32(double %Val, i32 %power) nounwind readnone
 ;CHECK-LABEL: @powi_f64(
 ;CHECK: llvm.powi.v4f64
 ;CHECK: ret void
-define void @powi_f64(i32 %n, double* noalias %y, double* noalias %x, i32 %P) nounwind uwtable {
+define void @powi_f64(i32 %n, ptr noalias %y, ptr noalias %x, i32 %P) nounwind uwtable {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.powi.f64.i32(double %0, i32  %P) nounwind readnone
-  %arrayidx4 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx4, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx4, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1170,19 +1170,19 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK-LABEL: @powi_f64_neg(
 ;CHECK-NOT: llvm.powi.v4f64
 ;CHECK: ret void
-define void @powi_f64_neg(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @powi_f64_neg(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %1 = trunc i64 %indvars.iv to i32
   %call = tail call double @llvm.powi.f64.i32(double %0, i32  %1) nounwind readnone
-  %arrayidx4 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx4, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx4, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1197,18 +1197,18 @@ declare i64  @llvm.cttz.i64 (i64, i1) nounwind readnone
 ;CHECK-LABEL: @cttz_f64(
 ;CHECK: llvm.cttz.v4i64
 ;CHECK: ret void
-define void @cttz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
+define void @cttz_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %y, i64 %indvars.iv
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %y, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx, align 8
   %call = tail call i64 @llvm.cttz.i64(i64 %0, i1 true) nounwind readnone
-  %arrayidx4 = getelementptr inbounds i64, i64* %x, i64 %indvars.iv
-  store i64 %call, i64* %arrayidx4, align 8
+  %arrayidx4 = getelementptr inbounds i64, ptr %x, i64 %indvars.iv
+  store i64 %call, ptr %arrayidx4, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1223,18 +1223,18 @@ declare i64  @llvm.ctlz.i64 (i64, i1) nounwind readnone
 ;CHECK-LABEL: @ctlz_f64(
 ;CHECK: llvm.ctlz.v4i64
 ;CHECK: ret void
-define void @ctlz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
+define void @ctlz_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %y, i64 %indvars.iv
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %y, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx, align 8
   %call = tail call i64 @llvm.ctlz.i64(i64 %0, i1 true) nounwind readnone
-  %arrayidx4 = getelementptr inbounds i64, i64* %x, i64 %indvars.iv
-  store i64 %call, i64* %arrayidx4, align 8
+  %arrayidx4 = getelementptr inbounds i64, ptr %x, i64 %indvars.iv
+  store i64 %call, ptr %arrayidx4, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1246,7 +1246,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare i64 @llvm.abs.i64 (i64, i1) nounwind readnone
 
-define void @abs_i64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
+define void @abs_i64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 ;CHECK-LABEL: @abs_i64(
 ;CHECK: llvm.abs.v4i64(<4 x i64> [[WIDE_LOADX:%.*]], i1 true)
 ;CHECK: ret void
@@ -1256,11 +1256,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %y, i64 %indvars.iv
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %y, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx, align 8
   %call = tail call i64 @llvm.abs.i64(i64 %0, i1 true) nounwind readnone
-  %arrayidx4 = getelementptr inbounds i64, i64* %x, i64 %indvars.iv
-  store i64 %call, i64* %arrayidx4, align 8
+  %arrayidx4 = getelementptr inbounds i64, ptr %x, i64 %indvars.iv
+  store i64 %call, ptr %arrayidx4, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1272,7 +1272,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare i32 @llvm.smin.i32 (i32, i32)
 
-define void @smin_i32(i32 %n, i32* noalias %x, i32* noalias %y) {
+define void @smin_i32(i32 %n, ptr noalias %x, ptr noalias %y) {
 ; CHECK-LABEL: @smin_i32(
 ; CHECK:         call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[WIDE_LOADX:%.*]], <4 x i32> [[WIDE_LOADY:%.*]])
 ; CHECK:         ret void
@@ -1282,12 +1282,12 @@ entry:
 
 loop:
   %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ]
-  %xi = getelementptr inbounds i32, i32* %x, i32 %iv
-  %yi = getelementptr inbounds i32, i32* %y, i32 %iv
-  %xld = load i32, i32* %xi, align 4
-  %yld = load i32, i32* %yi, align 4
+  %xi = getelementptr inbounds i32, ptr %x, i32 %iv
+  %yi = getelementptr inbounds i32, ptr %y, i32 %iv
+  %xld = load i32, ptr %xi, align 4
+  %yld = load i32, ptr %yi, align 4
   %call = tail call i32 @llvm.smin.i32(i32 %xld, i32 %yld)
-  store i32 %call, i32* %xi, align 4
+  store i32 %call, ptr %xi, align 4
   %iv.next = add i32 %iv, 1
   %exitcond = icmp eq i32 %iv.next, %n
   br i1 %exitcond, label %end, label %loop
@@ -1298,7 +1298,7 @@ end:
 
 declare i32 @llvm.smax.i32 (i32, i32)
 
-define void @smax_i32(i32 %n, i32* noalias %x, i32* noalias %y) {
+define void @smax_i32(i32 %n, ptr noalias %x, ptr noalias %y) {
 ; CHECK-LABEL: @smax_i32(
 ; CHECK:         call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[WIDE_LOADX:%.*]], <4 x i32> [[WIDE_LOADY:%.*]])
 ; CHECK:         ret void
@@ -1308,12 +1308,12 @@ entry:
 
 loop:
   %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ]
-  %xi = getelementptr inbounds i32, i32* %x, i32 %iv
-  %yi = getelementptr inbounds i32, i32* %y, i32 %iv
-  %xld = load i32, i32* %xi, align 4
-  %yld = load i32, i32* %yi, align 4
+  %xi = getelementptr inbounds i32, ptr %x, i32 %iv
+  %yi = getelementptr inbounds i32, ptr %y, i32 %iv
+  %xld = load i32, ptr %xi, align 4
+  %yld = load i32, ptr %yi, align 4
   %call = tail call i32 @llvm.smax.i32(i32 %xld, i32 %yld)
-  store i32 %call, i32* %xi, align 4
+  store i32 %call, ptr %xi, align 4
   %iv.next = add i32 %iv, 1
   %exitcond = icmp eq i32 %iv.next, %n
   br i1 %exitcond, label %end, label %loop
@@ -1324,7 +1324,7 @@ end:
 
 declare i32 @llvm.umin.i32 (i32, i32)
 
-define void @umin_i32(i32 %n, i32* noalias %x, i32* noalias %y) {
+define void @umin_i32(i32 %n, ptr noalias %x, ptr noalias %y) {
 ; CHECK-LABEL: @umin_i32(
 ; CHECK:         call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[WIDE_LOADX:%.*]], <4 x i32> [[WIDE_LOADY:%.*]])
 ; CHECK:         ret void
@@ -1334,12 +1334,12 @@ entry:
 
 loop:
   %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ]
-  %xi = getelementptr inbounds i32, i32* %x, i32 %iv
-  %yi = getelementptr inbounds i32, i32* %y, i32 %iv
-  %xld = load i32, i32* %xi, align 4
-  %yld = load i32, i32* %yi, align 4
+  %xi = getelementptr inbounds i32, ptr %x, i32 %iv
+  %yi = getelementptr inbounds i32, ptr %y, i32 %iv
+  %xld = load i32, ptr %xi, align 4
+  %yld = load i32, ptr %yi, align 4
   %call = tail call i32 @llvm.umin.i32(i32 %xld, i32 %yld)
-  store i32 %call, i32* %xi, align 4
+  store i32 %call, ptr %xi, align 4
   %iv.next = add i32 %iv, 1
   %exitcond = icmp eq i32 %iv.next, %n
   br i1 %exitcond, label %end, label %loop
@@ -1350,7 +1350,7 @@ end:
 
 declare i32 @llvm.umax.i32 (i32, i32)
 
-define void @umax_i32(i32 %n, i32* noalias %x, i32* noalias %y) {
+define void @umax_i32(i32 %n, ptr noalias %x, ptr noalias %y) {
 ; CHECK-LABEL: @umax_i32(
 ; CHECK:         call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[WIDE_LOADX:%.*]], <4 x i32> [[WIDE_LOADY:%.*]])
 ; CHECK:         ret void
@@ -1360,12 +1360,12 @@ entry:
 
 loop:
   %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ]
-  %xi = getelementptr inbounds i32, i32* %x, i32 %iv
-  %yi = getelementptr inbounds i32, i32* %y, i32 %iv
-  %xld = load i32, i32* %xi, align 4
-  %yld = load i32, i32* %yi, align 4
+  %xi = getelementptr inbounds i32, ptr %x, i32 %iv
+  %yi = getelementptr inbounds i32, ptr %y, i32 %iv
+  %xld = load i32, ptr %xi, align 4
+  %yld = load i32, ptr %yi, align 4
   %call = tail call i32 @llvm.umax.i32(i32 %xld, i32 %yld)
-  store i32 %call, i32* %xi, align 4
+  store i32 %call, ptr %xi, align 4
   %iv.next = add i32 %iv, 1
   %exitcond = icmp eq i32 %iv.next, %n
   br i1 %exitcond, label %end, label %loop
@@ -1376,7 +1376,7 @@ end:
 
 declare i32 @llvm.fshl.i32 (i32, i32, i32)
 
-define void @fshl_i32(i32 %n, i32* noalias %x, i32* noalias %y, i32 %shAmt) {
+define void @fshl_i32(i32 %n, ptr noalias %x, ptr noalias %y, i32 %shAmt) {
 ; CHECK-LABEL: @fshl_i32(
 ; CHECK:         call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[WIDE_LOADX:%.*]], <4 x i32> [[WIDE_LOADY:%.*]], <4 x i32> [[SPLAT:%.*]])
 ; CHECK:         ret void
@@ -1386,12 +1386,12 @@ entry:
 
 loop:
   %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ]
-  %xi = getelementptr inbounds i32, i32* %x, i32 %iv
-  %yi = getelementptr inbounds i32, i32* %y, i32 %iv
-  %xld = load i32, i32* %xi, align 4
-  %yld = load i32, i32* %yi, align 4
+  %xi = getelementptr inbounds i32, ptr %x, i32 %iv
+  %yi = getelementptr inbounds i32, ptr %y, i32 %iv
+  %xld = load i32, ptr %xi, align 4
+  %yld = load i32, ptr %yi, align 4
   %call = tail call i32 @llvm.fshl.i32(i32 %xld, i32 %yld, i32 %shAmt)
-  store i32 %call, i32* %xi, align 4
+  store i32 %call, ptr %xi, align 4
   %iv.next = add i32 %iv, 1
   %exitcond = icmp eq i32 %iv.next, %n
   br i1 %exitcond, label %end, label %loop
@@ -1402,7 +1402,7 @@ end:
 
 declare i32 @llvm.fshr.i32 (i32, i32, i32)
 
-define void @fshr_i32(i32 %n, i32* noalias %x, i32* noalias %y, i32 %shAmt) {
+define void @fshr_i32(i32 %n, ptr noalias %x, ptr noalias %y, i32 %shAmt) {
 ; CHECK-LABEL: @fshr_i32(
 ; CHECK:         call <4 x i32> @llvm.fshr.v4i32(<4 x i32> [[WIDE_LOADX:%.*]], <4 x i32> [[WIDE_LOADY:%.*]], <4 x i32> [[SPLAT:%.*]])
 ; CHECK:         ret void
@@ -1412,12 +1412,12 @@ entry:
 
 loop:
   %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ]
-  %xi = getelementptr inbounds i32, i32* %x, i32 %iv
-  %yi = getelementptr inbounds i32, i32* %y, i32 %iv
-  %xld = load i32, i32* %xi, align 4
-  %yld = load i32, i32* %yi, align 4
+  %xi = getelementptr inbounds i32, ptr %x, i32 %iv
+  %yi = getelementptr inbounds i32, ptr %y, i32 %iv
+  %xld = load i32, ptr %xi, align 4
+  %yld = load i32, ptr %yi, align 4
   %call = tail call i32 @llvm.fshr.i32(i32 %xld, i32 %yld, i32 %shAmt)
-  store i32 %call, i32* %xi, align 4
+  store i32 %call, ptr %xi, align 4
   %iv.next = add i32 %iv, 1
   %exitcond = icmp eq i32 %iv.next, %n
   br i1 %exitcond, label %end, label %loop
@@ -1431,20 +1431,20 @@ declare float @llvm.minnum.f32(float, float) nounwind readnone
 ;CHECK-LABEL: @minnum_f32(
 ;CHECK: llvm.minnum.v4f32
 ;CHECK: ret void
-define void @minnum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+define void @minnum_f32(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z) nounwind uwtable {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %z, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %call = tail call float @llvm.minnum.f32(float %0, float %1) nounwind readnone
-  %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1459,20 +1459,20 @@ declare float @llvm.maxnum.f32(float, float) nounwind readnone
 ;CHECK-LABEL: @maxnum_f32(
 ;CHECK: llvm.maxnum.v4f32
 ;CHECK: ret void
-define void @maxnum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+define void @maxnum_f32(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z) nounwind uwtable {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %z, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %call = tail call float @llvm.maxnum.f32(float %0, float %1) nounwind readnone
-  %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1487,20 +1487,20 @@ declare float @llvm.minimum.f32(float, float) nounwind readnone
 ;CHECK-LABEL: @minimum_f32(
 ;CHECK: llvm.minimum.v4f32
 ;CHECK: ret void
-define void @minimum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+define void @minimum_f32(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z) nounwind uwtable {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %z, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %call = tail call float @llvm.minimum.f32(float %0, float %1) nounwind readnone
-  %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1515,20 +1515,20 @@ declare float @llvm.maximum.f32(float, float) nounwind readnone
 ;CHECK-LABEL: @maximum_f32(
 ;CHECK: llvm.maximum.v4f32
 ;CHECK: ret void
-define void @maximum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+define void @maximum_f32(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z) nounwind uwtable {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %z, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %call = tail call float @llvm.maximum.f32(float %0, float %1) nounwind readnone
-  %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n

diff  --git a/llvm/test/Transforms/LoopVectorize/irregular_type.ll b/llvm/test/Transforms/LoopVectorize/irregular_type.ll
index d2a3ed774d683..f9326c49f5508 100644
--- a/llvm/test/Transforms/LoopVectorize/irregular_type.ll
+++ b/llvm/test/Transforms/LoopVectorize/irregular_type.ll
@@ -8,16 +8,16 @@
 ; CHECK-NOT: load <4 x i7>
 ; CHECK-NOT: store <4 x i7>
 ; CHECK: for.body
-define void @foo(i7* %a, i64 %n) {
+define void @foo(ptr %a, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i7, i7* %a, i64 %indvars.iv
-  %0 = load i7, i7* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i7, ptr %a, i64 %indvars.iv
+  %0 = load i7, ptr %arrayidx, align 1
   %sub = add nuw nsw i7 %0, 0
-  store i7 %sub, i7* %arrayidx, align 1
+  store i7 %sub, ptr %arrayidx, align 1
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %cmp = icmp eq i64 %indvars.iv.next, %n
   br i1 %cmp, label %for.exit, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/libcall-remark.ll b/llvm/test/Transforms/LoopVectorize/libcall-remark.ll
index 1443015399c20..660dc6f19ee1a 100644
--- a/llvm/test/Transforms/LoopVectorize/libcall-remark.ll
+++ b/llvm/test/Transforms/LoopVectorize/libcall-remark.ll
@@ -18,10 +18,10 @@ for.cond.cleanup:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds [32768 x float], [32768 x float]* @data, i64 0, i64 %indvars.iv
-  %t0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [32768 x float], ptr @data, i64 0, i64 %indvars.iv
+  %t0 = load float, ptr %arrayidx, align 4
   %sqrtf = tail call float @sqrtf(float %t0)
-  store float %sqrtf, float* %arrayidx, align 4
+  store float %sqrtf, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 32768
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -38,10 +38,10 @@ for.cond.cleanup:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds [32768 x float], [32768 x float]* @data, i64 0, i64 %indvars.iv
-  %t0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [32768 x float], ptr @data, i64 0, i64 %indvars.iv
+  %t0 = load float, ptr %arrayidx, align 4
   %sqrtf = tail call float @arbitrary(float %t0)
-  store float %sqrtf, float* %arrayidx, align 4
+  store float %sqrtf, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 32768
   br i1 %exitcond, label %for.cond.cleanup, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/lifetime.ll b/llvm/test/Transforms/LoopVectorize/lifetime.ll
index e441f8b12bf22..3dd41b57d863b 100644
--- a/llvm/test/Transforms/LoopVectorize/lifetime.ll
+++ b/llvm/test/Transforms/LoopVectorize/lifetime.ll
@@ -9,27 +9,26 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK: store <2 x i32>
 ; CHECK: call void @llvm.lifetime.start
 
-define void @test(i32 *%d) {
+define void @test(ptr %d) {
 entry:
   %arr = alloca [1024 x i32], align 16
-  %0 = bitcast [1024 x i32]* %arr to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4096, i8* %0) #1
+  call void @llvm.lifetime.start.p0(i64 4096, ptr %arr) #1
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  call void @llvm.lifetime.end.p0i8(i64 4096, i8* %0) #1
-  %arrayidx = getelementptr inbounds i32, i32* %d, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx, align 8
-  store i32 100, i32* %arrayidx, align 8
-  call void @llvm.lifetime.start.p0i8(i64 4096, i8* %0) #1
+  call void @llvm.lifetime.end.p0(i64 4096, ptr %arr) #1
+  %arrayidx = getelementptr inbounds i32, ptr %d, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 8
+  store i32 100, ptr %arrayidx, align 8
+  call void @llvm.lifetime.start.p0(i64 4096, ptr %arr) #1
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 128
   br i1 %exitcond, label %for.body, label %for.end
 
 for.end:
-  call void @llvm.lifetime.end.p0i8(i64 4096, i8* %0) #1
+  call void @llvm.lifetime.end.p0(i64 4096, ptr %arr) #1
   ret void
 }
 
@@ -38,31 +37,29 @@ for.end:
 ; CHECK: store <2 x i32>
 ; CHECK: call void @llvm.lifetime.start
 
-define void @testbitcast(i32 *%d) {
+define void @testbitcast(ptr %d) {
 entry:
   %arr = alloca [1024 x i32], align 16
-  %0 = bitcast [1024 x i32]* %arr to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4096, i8* %0) #1
+  call void @llvm.lifetime.start.p0(i64 4096, ptr %arr) #1
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %1 = bitcast [1024 x i32]* %arr to i8*
-  call void @llvm.lifetime.end.p0i8(i64 4096, i8* %1) #1
-  %arrayidx = getelementptr inbounds i32, i32* %d, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx, align 8
-  store i32 100, i32* %arrayidx, align 8
-  call void @llvm.lifetime.start.p0i8(i64 4096, i8* %1) #1
+  call void @llvm.lifetime.end.p0(i64 4096, ptr %arr) #1
+  %arrayidx = getelementptr inbounds i32, ptr %d, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 8
+  store i32 100, ptr %arrayidx, align 8
+  call void @llvm.lifetime.start.p0(i64 4096, ptr %arr) #1
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 128
   br i1 %exitcond, label %for.body, label %for.end
 
 for.end:
-  call void @llvm.lifetime.end.p0i8(i64 4096, i8* %0) #1
+  call void @llvm.lifetime.end.p0(i64 4096, ptr %arr) #1
   ret void
 }
 
-declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1
 
-declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1

diff  --git a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll
index 21ad858780b07..7aa8efa566b99 100644
--- a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll
+++ b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll
@@ -5,7 +5,7 @@
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 
 ;
-define void @vector_gep(i32** %a, i32 *%b, i64 %n) {
+define void @vector_gep(ptr %a, ptr %b, i64 %n) {
 ; CHECK-LABEL: @vector_gep(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
@@ -17,10 +17,9 @@ define void @vector_gep(i32** %a, i32 *%b, i64 %n) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], <2 x i64> [[VEC_IND]]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32*, i32** [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32** [[TMP1]] to <2 x i32*>*
-; CHECK-NEXT:    store <2 x i32*> [[TMP0]], <2 x i32*>* [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], <2 x i64> [[VEC_IND]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <2 x ptr> [[TMP0]], ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -33,9 +32,9 @@ define void @vector_gep(i32** %a, i32 *%b, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I]]
-; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[I]]
-; CHECK-NEXT:    store i32* [[VAR0]], i32** [[VAR1]], align 8
+; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
+; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[I]]
+; CHECK-NEXT:    store ptr [[VAR0]], ptr [[VAR1]], align 8
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -47,9 +46,9 @@ entry:
 
 for.body:
   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
-  %var0 = getelementptr inbounds i32, i32* %b, i64 %i
-  %var1 = getelementptr inbounds i32*, i32** %a, i64 %i
-  store i32* %var0, i32** %var1, align 8
+  %var0 = getelementptr inbounds i32, ptr %b, i64 %i
+  %var1 = getelementptr inbounds ptr, ptr %a, i64 %i
+  store ptr %var0, ptr %var1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -59,7 +58,7 @@ for.end:
 }
 
 ;
-define void @scalar_store(i32** %a, i32 *%b, i64 %n) {
+define void @scalar_store(ptr %a, ptr %b, i64 %n) {
 ; CHECK-LABEL: @scalar_store(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 2)
@@ -76,12 +75,12 @@ define void @scalar_store(i32** %a, i32 *%b, i64 %n) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32*, i32** [[A:%.*]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[TMP3]]
-; CHECK-NEXT:    store i32* [[TMP4]], i32** [[TMP6]], align 8
-; CHECK-NEXT:    store i32* [[TMP5]], i32** [[TMP7]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT:    store ptr [[TMP4]], ptr [[TMP6]], align 8
+; CHECK-NEXT:    store ptr [[TMP5]], ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -93,9 +92,9 @@ define void @scalar_store(i32** %a, i32 *%b, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I]]
-; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[I]]
-; CHECK-NEXT:    store i32* [[VAR0]], i32** [[VAR1]], align 8
+; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
+; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[I]]
+; CHECK-NEXT:    store ptr [[VAR0]], ptr [[VAR1]], align 8
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 2
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -107,9 +106,9 @@ entry:
 
 for.body:
   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
-  %var0 = getelementptr inbounds i32, i32* %b, i64 %i
-  %var1 = getelementptr inbounds i32*, i32** %a, i64 %i
-  store i32* %var0, i32** %var1, align 8
+  %var0 = getelementptr inbounds i32, ptr %b, i64 %i
+  %var1 = getelementptr inbounds ptr, ptr %a, i64 %i
+  store ptr %var0, ptr %var1, align 8
   %i.next = add nuw nsw i64 %i, 2
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -119,7 +118,7 @@ for.end:
 }
 
 ;
-define void @expansion(i32** %a, i64 *%b, i64 %n) {
+define void @expansion(ptr %a, ptr %b, i64 %n) {
 ; CHECK-LABEL: @expansion(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 2)
@@ -136,14 +135,12 @@ define void @expansion(i32** %a, i64 *%b, i64 %n) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32*, i32** [[A:%.*]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32** [[TMP6]] to i64**
-; CHECK-NEXT:    store i64* [[TMP4]], i64** [[TMP8]], align 8
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32** [[TMP7]] to i64**
-; CHECK-NEXT:    store i64* [[TMP5]], i64** [[TMP9]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT:    store ptr [[TMP4]], ptr [[TMP6]], align 8
+; CHECK-NEXT:    store ptr [[TMP5]], ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -155,10 +152,9 @@ define void @expansion(i32** %a, i64 *%b, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[I]]
-; CHECK-NEXT:    [[VAR3:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[I]]
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i32** [[VAR3]] to i64**
-; CHECK-NEXT:    store i64* [[VAR0]], i64** [[TMP11]], align 8
+; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I]]
+; CHECK-NEXT:    [[VAR3:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[I]]
+; CHECK-NEXT:    store ptr [[VAR0]], ptr [[VAR3]], align 8
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 2
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -170,11 +166,9 @@ entry:
 
 for.body:
   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
-  %var0 = getelementptr inbounds i64, i64* %b, i64 %i
-  %var1 = bitcast i64* %var0 to i32*
-  %var2 = getelementptr inbounds i32*, i32** %a, i64 0
-  %var3 = getelementptr inbounds i32*, i32** %var2, i64 %i
-  store i32* %var1, i32** %var3, align 8
+  %var0 = getelementptr inbounds i64, ptr %b, i64 %i
+  %var3 = getelementptr inbounds ptr, ptr %a, i64 %i
+  store ptr %var0, ptr %var3, align 8
   %i.next = add nuw nsw i64 %i, 2
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -184,7 +178,7 @@ for.end:
 }
 
 ;
-define void @no_gep_or_bitcast(i32** noalias %a, i64 %n) {
+define void @no_gep_or_bitcast(ptr noalias %a, i64 %n) {
 ; CHECK-LABEL: @no_gep_or_bitcast(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
@@ -195,13 +189,12 @@ define void @no_gep_or_bitcast(i32** noalias %a, i64 %n) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32*, i32** [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32** [[TMP0]] to <2 x i32*>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32*>, <2 x i32*>* [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32*> [[WIDE_LOAD]], i64 0
-; CHECK-NEXT:    store i32 0, i32* [[TMP2]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32*> [[WIDE_LOAD]], i64 1
-; CHECK-NEXT:    store i32 0, i32* [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x ptr>, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x ptr> [[WIDE_LOAD]], i64 0
+; CHECK-NEXT:    store i32 0, ptr [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x ptr> [[WIDE_LOAD]], i64 1
+; CHECK-NEXT:    store i32 0, ptr [[TMP3]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -213,9 +206,9 @@ define void @no_gep_or_bitcast(i32** noalias %a, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[I]]
-; CHECK-NEXT:    [[VAR1:%.*]] = load i32*, i32** [[VAR0]], align 8
-; CHECK-NEXT:    store i32 0, i32* [[VAR1]], align 8
+; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[I]]
+; CHECK-NEXT:    [[VAR1:%.*]] = load ptr, ptr [[VAR0]], align 8
+; CHECK-NEXT:    store i32 0, ptr [[VAR1]], align 8
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP9:![0-9]+]]
@@ -227,9 +220,9 @@ entry:
 
 for.body:
   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
-  %var0 = getelementptr inbounds i32*, i32** %a, i64 %i
-  %var1 = load i32*, i32** %var0, align 8
-  store i32 0, i32* %var1, align 8
+  %var0 = getelementptr inbounds ptr, ptr %a, i64 %i
+  %var1 = load ptr, ptr %var0, align 8
+  store i32 0, ptr %var1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end

diff  --git a/llvm/test/Transforms/LoopVectorize/loop-vect-memdep.ll b/llvm/test/Transforms/LoopVectorize/loop-vect-memdep.ll
index 90e601b834c0e..cd4d63be4d1a8 100644
--- a/llvm/test/Transforms/LoopVectorize/loop-vect-memdep.ll
+++ b/llvm/test/Transforms/LoopVectorize/loop-vect-memdep.ll
@@ -4,20 +4,20 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; REQUIRES: asserts
 ; CHECK: LV: Can't vectorize due to memory conflicts
 
-define void @test_loop_novect(double** %arr, i64 %n) {
+define void @test_loop_novect(ptr %arr, i64 %n) {
 for.body.lr.ph:
-  %t = load double*, double** %arr, align 8
+  %t = load ptr, ptr %arr, align 8
   br label %for.body
 
 for.body:                                      ; preds = %for.body, %for.body.lr.ph
   %i = phi i64 [ 0, %for.body.lr.ph ], [ %i.next, %for.body ]
-  %a = getelementptr inbounds double, double* %t, i64 %i
+  %a = getelementptr inbounds double, ptr %t, i64 %i
   %i.next = add nuw nsw i64 %i, 1
-  %a.next = getelementptr inbounds double, double* %t, i64 %i.next
-  %t1 = load double, double* %a, align 8
-  %t2 = load double, double* %a.next, align 8
-  store double %t1, double* %a.next, align 8
-  store double %t2, double* %a, align 8
+  %a.next = getelementptr inbounds double, ptr %t, i64 %i.next
+  %t1 = load double, ptr %a, align 8
+  %t2 = load double, ptr %a.next, align 8
+  store double %t1, ptr %a.next, align 8
+  store double %t2, ptr %a, align 8
   %c = icmp eq i64 %i, %n
   br i1 %c, label %final, label %for.body
 

diff  --git a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
index 278e37f430bea..3e4179bb75b19 100644
--- a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
+++ b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
@@ -35,16 +35,16 @@ define void @maxvf3() {
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 [[TMP2]]
-; CHECK-NEXT:    store i8 69, i8* [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    store i8 69, ptr [[TMP3]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
 ; CHECK:       pred.store.if1:
 ; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 [[TMP5]]
-; CHECK-NEXT:    store i8 69, i8* [[TMP6]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP5]]
+; CHECK-NEXT:    store i8 69, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.continue2:
 ; CHECK-NEXT:    [[TMP7:%.*]] = add nuw nsw <2 x i32> <i32 3, i32 3>, [[VEC_IND]]
@@ -52,16 +52,16 @@ define void @maxvf3() {
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
 ; CHECK:       pred.store.if3:
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i32> [[TMP7]], i32 0
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 [[TMP9]]
-; CHECK-NEXT:    store i8 7, i8* [[TMP10]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP9]]
+; CHECK-NEXT:    store i8 7, ptr [[TMP10]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; CHECK:       pred.store.continue4:
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.if5:
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i32> [[TMP7]], i32 1
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 [[TMP12]]
-; CHECK-NEXT:    store i8 7, i8* [[TMP13]], align 8
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP12]]
+; CHECK-NEXT:    store i8 7, ptr [[TMP13]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.continue6:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
@@ -75,11 +75,11 @@ define void @maxvf3() {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[J:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[J_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[AJ:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 [[J]]
-; CHECK-NEXT:    store i8 69, i8* [[AJ]], align 8
+; CHECK-NEXT:    [[AJ:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[J]]
+; CHECK-NEXT:    store i8 69, ptr [[AJ]], align 8
 ; CHECK-NEXT:    [[JP3:%.*]] = add nuw nsw i32 3, [[J]]
-; CHECK-NEXT:    [[AJP3:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 [[JP3]]
-; CHECK-NEXT:    store i8 7, i8* [[AJP3]], align 8
+; CHECK-NEXT:    [[AJP3:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[JP3]]
+; CHECK-NEXT:    store i8 7, ptr [[AJP3]], align 8
 ; CHECK-NEXT:    [[J_NEXT]] = add nuw nsw i32 [[J]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[J_NEXT]], 15
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2
@@ -91,11 +91,11 @@ entry:
 
 for.body:
   %j = phi i32 [ 0, %entry ], [ %j.next, %for.body ]
-  %aj = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 %j
-  store i8 69, i8* %aj, align 8
+  %aj = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 %j
+  store i8 69, ptr %aj, align 8
   %jp3 = add nuw nsw i32 3, %j
-  %ajp3 = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 %jp3
-  store i8 7, i8* %ajp3, align 8
+  %ajp3 = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 %jp3
+  store i8 7, ptr %ajp3, align 8
   %j.next = add nuw nsw i32 %j, 1
   %exitcond = icmp eq i32 %j.next, 15
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0

diff  --git a/llvm/test/Transforms/LoopVectorize/memdep.ll b/llvm/test/Transforms/LoopVectorize/memdep.ll
index 16a1fd77e47d7..b891b4312f18d 100644
--- a/llvm/test/Transforms/LoopVectorize/memdep.ll
+++ b/llvm/test/Transforms/LoopVectorize/memdep.ll
@@ -14,18 +14,18 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK-LABEL: @f1_vec(
 ; CHECK: <2 x i32>
 
-define void @f1_vec(i32* %A) {
+define void @f1_vec(ptr %A) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %indvars.iv.next = add i32 %indvars.iv, 1
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv.next
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv.next
+  %0 = load i32, ptr %arrayidx, align 4
   %add1 = add nsw i32 %0, 1
-  %arrayidx3 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  store i32 %add1, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  store i32 %add1, ptr %arrayidx3, align 4
   %exitcond = icmp ne i32 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.body, label %for.end
 
@@ -40,18 +40,18 @@ for.end:
 ; CHECK-LABEL: @f2_novec(
 ; CHECK-NOT: <2 x i32>
 
-define void @f2_novec(i32* %A) {
+define void @f2_novec(ptr %A) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, 1
   %indvars.iv.next = add i32 %indvars.iv, 1
-  %arrayidx3 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv.next
-  store i32 %add, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %A, i32 %indvars.iv.next
+  store i32 %add, ptr %arrayidx3, align 4
   %exitcond = icmp ne i32 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.body, label %for.end
 
@@ -69,20 +69,20 @@ for.end:
 ; WIDTH: f3_vec_len
 ; WIDTH-NOT: <4 x i32>
 
-define void @f3_vec_len(i32* %A) {
+define void @f3_vec_len(ptr %A) {
 entry:
   br label %for.body
 
 for.body:
   %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %idxprom = sext i32 %i.01 to i64
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %idxprom
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, 1
   %add1 = add nsw i32 %i.01, 2
   %idxprom2 = sext i32 %add1 to i64
-  %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %idxprom2
-  store i32 %add, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom2
+  store i32 %add, ptr %arrayidx3, align 4
   %inc = add nsw i32 %i.01, 1
   %cmp = icmp slt i32 %inc, 1024
   br i1 %cmp, label %for.body, label %for.end
@@ -101,20 +101,20 @@ for.end:
 ; CHECK-LABEL: @f5(
 ; CHECK-NOT: <2 x i32>
 
-define void @f5(i32*  %A, i32* %B) {
+define void @f5(ptr  %A, ptr %B) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  store i32 %0, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  store i32 %0, ptr %arrayidx2, align 4
   %indvars.iv.next = add nsw i64 %indvars.iv, 1
-  %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv.next
-  %1 = load i32, i32* %arrayidx4, align 4
-  store i32 %1, i32* %arrayidx, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv.next
+  %1 = load i32, ptr %arrayidx4, align 4
+  store i32 %1, ptr %arrayidx, align 4
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 1024
   br i1 %exitcond, label %for.body, label %for.end
@@ -132,7 +132,7 @@ for.end:
 ; CHECK-LABEL: @f6
 ; CHECK-NOT: <2 x i32>
 
-define i32 @f6(i32* %a, i32 %tmp) {
+define i32 @f6(ptr %a, i32 %tmp) {
 entry:
   br label %for.body
 
@@ -140,10 +140,10 @@ for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %tmp.addr.08 = phi i32 [ %tmp, %entry ], [ %0, %for.body ]
   %indvars.iv.next = add nsw i64 %indvars.iv, 1
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-  store i32 %tmp.addr.08, i32* %arrayidx, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx3, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.next
+  store i32 %tmp.addr.08, ptr %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx3, align 4
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 1024
   br i1 %exitcond, label %for.body, label %for.end
@@ -164,21 +164,21 @@ for.end:
 ; CHECK-LABEL: @nostoreloadforward(
 ; CHECK-NOT: <2 x i32>
 
-define void @nostoreloadforward(i32* %A) {
+define void @nostoreloadforward(ptr %A) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 16, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = add nsw i64 %indvars.iv, -3
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0
-  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %0
+  %1 = load i32, ptr %arrayidx, align 4
   %2 = add nsw i64 %indvars.iv, 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %2
-  %3 = load i32, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %2
+  %3 = load i32, ptr %arrayidx2, align 4
   %add3 = add nsw i32 %3, %1
-  %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %add3, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  store i32 %add3, ptr %arrayidx5, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 128
@@ -199,21 +199,21 @@ for.end:
 ; CHECK-LABEL: @nostoreloadforward2(
 ; CHECK-NOT: <2 x i32>
 
-define void @nostoreloadforward2(i32* noalias %A, i32* noalias %B, i32* noalias %C) {
+define void @nostoreloadforward2(ptr noalias %A, ptr noalias %B, ptr noalias %C) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 16, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %0, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  store i32 %0, ptr %arrayidx2, align 4
   %1 = add nsw i64 %indvars.iv, -3
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %1
-  %2 = load i32, i32* %arrayidx4, align 4
-  %arrayidx6 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
-  store i32 %2, i32* %arrayidx6, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 %1
+  %2 = load i32, ptr %arrayidx4, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
+  store i32 %2, ptr %arrayidx6, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 128
@@ -258,11 +258,11 @@ for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = shl i64 %indvars.iv, 1
   %1 = sub nuw nsw i64 64, %0
-  %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* @a, i64 0, i64 %1
-  store i32 69, i32* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds [64 x i32], ptr @a, i64 0, i64 %1
+  store i32 69, ptr %arrayidx, align 8
   %2 = sub nuw nsw i64 52, %0
-  %arrayidx4 = getelementptr inbounds [64 x i32], [64 x i32]* @a, i64 0, i64 %2
-  store i32 7, i32* %arrayidx4, align 8
+  %arrayidx4 = getelementptr inbounds [64 x i32], ptr @a, i64 0, i64 %2
+  store i32 7, ptr %arrayidx4, align 8
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 26
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll b/llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll
index 914a27d1ee997..151a882dfb814 100644
--- a/llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll
+++ b/llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll
@@ -14,21 +14,21 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 ; CHECK: remark: source.c:4:16: loop not vectorized: cannot identify array bounds
 
-define void @test_unknown_bounds(i64 %n, i32* nocapture %A, i32* nocapture readonly %B) !dbg !13 {
+define void @test_unknown_bounds(i64 %n, ptr nocapture %A, ptr nocapture readonly %B) !dbg !13 {
 entry:
   %cmp10 = icmp sgt i64 %n, 0
   br i1 %cmp10, label %for.body, label %for.cond.cleanup
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %idxprom1 = sext i32 %0 to i64, !dbg !35
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1, !dbg !35
-  %1 = load i32, i32* %arrayidx2, align 4, !dbg !35
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %idxprom1, !dbg !35
+  %1 = load i32, ptr %arrayidx2, align 4, !dbg !35
   %add = add nsw i32 %1, 1
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !28
@@ -47,7 +47,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 ; CHECK-NOT: remark: source.c:{{0-9]+}}:{{[0-9]+}}:
 
-define void @test_nodep(i64 %n, i32* nocapture readonly %A, i32* nocapture %B) !dbg !44 {
+define void @test_nodep(i64 %n, ptr nocapture readonly %A, ptr nocapture %B) !dbg !44 {
 entry:
   %cmp12 = icmp sgt i64 %n, 1
   br i1 %cmp12, label %for.body, label %for.cond.cleanup
@@ -55,14 +55,14 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = add nsw i64 %indvars.iv, -1
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !61
-  %1 = load i32, i32* %arrayidx, align 4, !dbg !61
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %0, !dbg !61
+  %1 = load i32, ptr %arrayidx, align 4, !dbg !61
   %2 = add nuw nsw i64 %indvars.iv, 2
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %2, !dbg !63
-  %3 = load i32, i32* %arrayidx2, align 4, !dbg !63
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %2, !dbg !63
+  %3 = load i32, ptr %arrayidx2, align 4, !dbg !63
   %add3 = add nsw i32 %3, %1
-  %arrayidx5 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  store i32 %add3, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  store i32 %add3, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
@@ -83,20 +83,20 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 ; }
 
 ; CHECK-NOT: remark: source.c:{{0-9]+}}:{{[0-9]+}}:
-define dso_local void @test_forward(i64 %n, i32* nocapture %A, i32* nocapture %B) !dbg !70 {
+define dso_local void @test_forward(i64 %n, ptr nocapture %A, ptr nocapture %B) !dbg !70 {
 entry:
   %cmp11 = icmp sgt i64 %n, 1
   br i1 %cmp11, label %for.body, label %for.cond.cleanup, !dbg !81
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !83
-  store i32 10, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !83
+  store i32 10, ptr %arrayidx, align 4
   %0 = add nsw i64 %indvars.iv, -2
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !87
-  %1 = load i32, i32* %arrayidx2, align 4, !dbg !87
-  %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !88
-  store i32 %1, i32* %arrayidx4, align 4, !dbg !89
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %0, !dbg !87
+  %1 = load i32, ptr %arrayidx2, align 4, !dbg !87
+  %arrayidx4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv, !dbg !88
+  store i32 %1, ptr %arrayidx4, align 4, !dbg !89
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !81
@@ -118,7 +118,7 @@ for.body:                                         ; preds = %entry, %for.body
 
 ; CHECK-NOT: remark: source.c:{{0-9]+}}:{{[0-9]+}}:
 
-define dso_local void @test_backwardVectorizable(i64 %n, i32* nocapture %A) !dbg !93 {
+define dso_local void @test_backwardVectorizable(i64 %n, ptr nocapture %A) !dbg !93 {
 entry:
   %cmp8 = icmp sgt i64 %n, 4
   br i1 %cmp8, label %for.body, label %for.cond.cleanup
@@ -126,11 +126,11 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = add nsw i64 %indvars.iv, -4, !dbg !106
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !108
-  %1 = load i32, i32* %arrayidx, align 4, !dbg !108
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %0, !dbg !108
+  %1 = load i32, ptr %arrayidx, align 4, !dbg !108
   %add = add nsw i32 %1, 1
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !110
-  store i32 %add, i32* %arrayidx2, align 4, !dbg !111
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !110
+  store i32 %add, ptr %arrayidx2, align 4, !dbg !111
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
@@ -152,7 +152,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: remark: source.c:48:14: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
 ; CHECK-NEXT: Backward loop carried data dependence. Memory location is the same as accessed at source.c:47:5
 
-define void @test_backward_dep(i64 %n, i32* nocapture %A) {
+define void @test_backward_dep(i64 %n, ptr nocapture %A) {
 entry:
   %cmp.not19 = icmp slt i64 %n, 4
   br i1 %cmp.not19, label %for.cond.cleanup, label %for.body.preheader
@@ -164,16 +164,16 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 1, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
   %0 = add nsw i64 %indvars.iv, -1
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0
-  %1 = load i32, i32* %arrayidx, align 8
-  %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !157
-  store i32 %1, i32* %arrayidx3, align 8
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %0
+  %1 = load i32, ptr %arrayidx, align 8
+  %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !157
+  store i32 %1, ptr %arrayidx3, align 8
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3
-  %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next, !dbg !160
-  %2 = load i32, i32* %arrayidx5, align 8, !dbg !160
+  %arrayidx5 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv.next, !dbg !160
+  %2 = load i32, ptr %arrayidx5, align 8, !dbg !160
   %3 = add nuw nsw i64 %indvars.iv, 1
-  %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %3
-  store i32 %2, i32* %arrayidx8, align 8
+  %arrayidx8 = getelementptr inbounds i32, ptr %A, i64 %3
+  store i32 %2, ptr %arrayidx8, align 8
   %cmp.not = icmp ugt i64 %indvars.iv.next, %n
   br i1 %cmp.not, label %for.cond.cleanup, label %for.body
 
@@ -197,20 +197,20 @@ for.body:                                         ; preds = %for.body.preheader,
 ; CHECK: remark: source.c:61:12: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
 ; CHECK-NEXT: Forward loop carried data dependence that prevents store-to-load forwarding. Memory location is the same as accessed at source.c:60:5
 
-define void @test_forwardButPreventsForwarding_dep(i64 %n, i32* nocapture %A, i32* nocapture %B) !dbg !166 {
+define void @test_forwardButPreventsForwarding_dep(i64 %n, ptr nocapture %A, ptr nocapture %B) !dbg !166 {
 entry:
   %cmp11 = icmp sgt i64 %n, 3
   br i1 %cmp11, label %for.body, label %for.cond.cleanup
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 3, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !179
-  store i32 10, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !179
+  store i32 10, ptr %arrayidx, align 4
   %0 = add nsw i64 %indvars.iv, -3
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !183
-  %1 = load i32, i32* %arrayidx2, align 4, !dbg !183
-  %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  store i32 %1, i32* %arrayidx4, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %0, !dbg !183
+  %1 = load i32, ptr %arrayidx2, align 4, !dbg !183
+  %arrayidx4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  store i32 %1, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
@@ -235,7 +235,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: remark: source.c:74:5: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
 ; CHECK: Backward loop carried data dependence that prevents store-to-load forwarding. Memory location is the same as accessed at source.c:74:21
 
-define void @test_backwardVectorizableButPreventsForwarding(i64 %n, i32* nocapture %A) !dbg !189 {
+define void @test_backwardVectorizableButPreventsForwarding(i64 %n, ptr nocapture %A) !dbg !189 {
 entry:
   %cmp13 = icmp sgt i64 %n, 15
   br i1 %cmp13, label %for.body, label %for.cond.cleanup
@@ -243,14 +243,14 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 15, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = add nsw i64 %indvars.iv, -2
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0
-  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %0
+  %1 = load i32, ptr %arrayidx, align 4
   %2 = add nsw i64 %indvars.iv, -15
-  %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %2, !dbg !207
-  %3 = load i32, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %2, !dbg !207
+  %3 = load i32, ptr %arrayidx3, align 4
   %add = add nsw i32 %3, %1
-  %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !209
-  store i32 %add, i32* %arrayidx5, align 4, !dbg !209
+  %arrayidx5 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !209
+  store i32 %add, ptr %arrayidx5, align 4, !dbg !209
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
@@ -271,7 +271,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: remark: source.c:83:7: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
 ; CHECK: Unknown data dependence. Memory location is the same as accessed at source.c:82:7
 
-define void @test_unknown_dep(i64 %n, i32* nocapture %A) !dbg !214 {
+define void @test_unknown_dep(i64 %n, ptr nocapture %A) !dbg !214 {
 entry:
   %cmp8 = icmp sgt i64 %n, 0
   br i1 %cmp8, label %for.body, label %for.cond.cleanup
@@ -280,10 +280,10 @@ for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %0 = shl nsw i64 %indvars.iv.next, 2
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !229
-  store i32 10, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !231
-  store i32 100, i32* %arrayidx2, align 4, !dbg !231
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %0, !dbg !229
+  store i32 10, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !231
+  store i32 100, ptr %arrayidx2, align 4, !dbg !231
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 

diff  --git a/llvm/test/Transforms/LoopVectorize/metadata-unroll.ll b/llvm/test/Transforms/LoopVectorize/metadata-unroll.ll
index 2b9af6dabadbf..14f8b988b319a 100644
--- a/llvm/test/Transforms/LoopVectorize/metadata-unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/metadata-unroll.ll
@@ -22,11 +22,11 @@ define void @inc(i32 %n) nounwind uwtable noinline ssp {
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
   %4 = trunc i64 %indvars.iv to i32
   %5 = add nsw i32 %3, %4
-  store i32 %5, i32* %2, align 4
+  store i32 %5, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n

diff  --git a/llvm/test/Transforms/LoopVectorize/metadata-width.ll b/llvm/test/Transforms/LoopVectorize/metadata-width.ll
index 0c3f78c335e86..ddf90294d5bfc 100644
--- a/llvm/test/Transforms/LoopVectorize/metadata-width.ll
+++ b/llvm/test/Transforms/LoopVectorize/metadata-width.ll
@@ -5,15 +5,15 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK-LABEL: @test1(
 ; CHECK: store <8 x i32>
 ; CHECK: ret void
-define void @test1(i32* nocapture %a, i32 %n) #0 {
+define void @test1(ptr nocapture %a, i32 %n) #0 {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 42, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 42, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -26,15 +26,15 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK-LABEL: @test2(
 ; CHECK: store <vscale x 8 x i32>
 ; CHECK: ret void
-define void @test2(i32* nocapture %a, i32 %n) #0 {
+define void @test2(ptr nocapture %a, i32 %n) #0 {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 42, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 42, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -47,15 +47,15 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK-LABEL: @test3(
 ; CHECK: store <8 x i32>
 ; CHECK: ret void
-define void @test3(i32* nocapture %a, i32 %n) #0 {
+define void @test3(ptr nocapture %a, i32 %n) #0 {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 42, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 42, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -71,15 +71,15 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK-LABEL: @test4(
 ; CHECK: store <8 x i32>
 ; CHECK: ret void
-define void @test4(i32* nocapture %a, i32 %n) #0 {
+define void @test4(ptr nocapture %a, i32 %n) #0 {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 42, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 42, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n

diff  --git a/llvm/test/Transforms/LoopVectorize/metadata.ll b/llvm/test/Transforms/LoopVectorize/metadata.ll
index cdfd2682f005c..0bd29e5236825 100644
--- a/llvm/test/Transforms/LoopVectorize/metadata.ll
+++ b/llvm/test/Transforms/LoopVectorize/metadata.ll
@@ -2,17 +2,17 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: nounwind uwtable
-define i32 @test1(i32* nocapture %a, float* nocapture readonly %b) #0 {
+define i32 @test1(ptr nocapture %a, ptr nocapture readonly %b) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4, !tbaa !0
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4, !tbaa !0
   %conv = fptosi float %0 to i32
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 %conv, i32* %arrayidx2, align 4, !tbaa !4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %conv, ptr %arrayidx2, align 4, !tbaa !4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1600
   br i1 %exitcond, label %for.end, label %for.body
@@ -22,8 +22,8 @@ for.end:                                          ; preds = %for.body
 }
 
 ; CHECK-LABEL: @test1
-; CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa ![[TFLT:[0-9]+]]
-; CHECK: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa ![[TINT:[0-9]+]]
+; CHECK: load <4 x float>, ptr %{{.*}}, align 4, !tbaa ![[TFLT:[0-9]+]]
+; CHECK: store <4 x i32> %{{.*}}, ptr %{{.*}}, align 4, !tbaa ![[TINT:[0-9]+]]
 ; CHECK: ret i32 0
 
 ; CHECK-DAG: ![[TFLT]] = !{![[TFLT1:[0-9]+]]

diff  --git a/llvm/test/Transforms/LoopVectorize/middle-block-dbg.ll b/llvm/test/Transforms/LoopVectorize/middle-block-dbg.ll
index 28882adfe0f1c..741ca559343d1 100644
--- a/llvm/test/Transforms/LoopVectorize/middle-block-dbg.ll
+++ b/llvm/test/Transforms/LoopVectorize/middle-block-dbg.ll
@@ -32,7 +32,7 @@ target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
 
 define dso_local void @"?a@@YAXXZ"() local_unnamed_addr #0 !dbg !8 {
 entry:
-  %0 = load i32, i32* @"?x@@3HA", align 4, !dbg !23, !tbaa !24
+  %0 = load i32, ptr @"?x@@3HA", align 4, !dbg !23, !tbaa !24
   %1 = zext i32 %0 to i64, !dbg !28
   %vla = alloca i32, i64 %1, align 16, !dbg !28
   %cmp10 = icmp sgt i32 %0, 0, !dbg !30
@@ -43,24 +43,24 @@ for.body.preheader:
 
 for.cond.cleanup.loopexit:
   %idxprom1.phi.trans.insert = sext i32 %0 to i64
-  %arrayidx2.phi.trans.insert = getelementptr inbounds i32, i32* %vla, i64 %idxprom1.phi.trans.insert
-  %.pre = load i32, i32* %arrayidx2.phi.trans.insert, align 4, !dbg !33, !tbaa !24
+  %arrayidx2.phi.trans.insert = getelementptr inbounds i32, ptr %vla, i64 %idxprom1.phi.trans.insert
+  %.pre = load i32, ptr %arrayidx2.phi.trans.insert, align 4, !dbg !33, !tbaa !24
   br label %for.cond.cleanup, !dbg !33
 
 for.cond.cleanup:
   %2 = phi i32 [ %.pre, %for.cond.cleanup.loopexit ], [ undef, %entry ], !dbg !33
   %sub = add nsw i32 %0, -5, !dbg !33
   %idxprom3 = sext i32 %sub to i64, !dbg !33
-  %arrayidx4 = getelementptr inbounds i32, i32* %vla, i64 %idxprom3, !dbg !33
-  %3 = load i32, i32* %arrayidx4, align 4, !dbg !33, !tbaa !24
+  %arrayidx4 = getelementptr inbounds i32, ptr %vla, i64 %idxprom3, !dbg !33
+  %3 = load i32, ptr %arrayidx4, align 4, !dbg !33, !tbaa !24
   %add = add nsw i32 %3, %2, !dbg !33
-  store i32 %add, i32* @"?y@@3HA", align 4, !dbg !33, !tbaa !24
+  store i32 %add, ptr @"?y@@3HA", align 4, !dbg !33, !tbaa !24
   ret void, !dbg !34
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %vla, i64 %indvars.iv, !dbg !31
-  store i32 %0, i32* %arrayidx, align 4, !dbg !31, !tbaa !24
+  %arrayidx = getelementptr inbounds i32, ptr %vla, i64 %indvars.iv, !dbg !31
+  store i32 %0, ptr %arrayidx, align 4, !dbg !31, !tbaa !24
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !35
   %exitcond = icmp eq i64 %indvars.iv.next, %1, !dbg !30
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !30, !llvm.loop !36

diff  --git a/llvm/test/Transforms/LoopVectorize/miniters.ll b/llvm/test/Transforms/LoopVectorize/miniters.ll
index 671f099eea689..0b4c002045186 100644
--- a/llvm/test/Transforms/LoopVectorize/miniters.ll
+++ b/llvm/test/Transforms/LoopVectorize/miniters.ll
@@ -25,13 +25,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body, %for.body.preheader
   %i.09 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* @b, i64 0, i64 %i.09
-  %tmp = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds [1000 x i32], [1000 x i32]* @c, i64 0, i64 %i.09
-  %tmp1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds [1000 x i32], ptr @b, i64 0, i64 %i.09
+  %tmp = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds [1000 x i32], ptr @c, i64 0, i64 %i.09
+  %tmp1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %tmp1, %tmp
-  %arrayidx2 = getelementptr inbounds [1000 x i32], [1000 x i32]* @a, i64 0, i64 %i.09
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds [1000 x i32], ptr @a, i64 0, i64 %i.09
+  store i32 %add, ptr %arrayidx2, align 4
   %inc = add nuw nsw i64 %i.09, 1
   %exitcond = icmp eq i64 %inc, %N
   br i1 %exitcond, label %for.end.loopexit, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
index dc58da26d3338..2e0b27aeaa9f4 100644
--- a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -25,8 +25,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp sgt i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -53,8 +53,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp slt i32 %max.red.08, %0
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -80,8 +80,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp slt i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -108,8 +108,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp sgt i32 %max.red.08, %0
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -137,8 +137,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp ugt i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -165,8 +165,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp ult i32 %max.red.08, %0
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -192,8 +192,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp ult i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -220,8 +220,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp ugt i32 %max.red.08, %0
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -248,8 +248,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp sge i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -276,8 +276,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp sle i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -304,8 +304,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp uge i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -332,8 +332,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp ule i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -355,10 +355,10 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 1, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %arrayidx1 = getelementptr inbounds [1024 x i32], ptr @A, i64 1, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %1 = load i32, ptr %arrayidx1, align 4
   %cmp3 = icmp sgt i32 %0, %1
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -379,10 +379,10 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 1, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %arrayidx1 = getelementptr inbounds [1024 x i32], ptr @A, i64 1, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %1 = load i32, ptr %arrayidx1, align 4
   %cmp3 = icmp sgt i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %1
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -412,8 +412,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ogt float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -437,8 +437,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast oge float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -462,8 +462,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast olt float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -487,8 +487,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ole float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -512,8 +512,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ugt float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -537,8 +537,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast uge float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -562,8 +562,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ult float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -587,8 +587,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ule float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -615,8 +615,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast olt float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -640,8 +640,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ole float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -665,8 +665,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ogt float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -690,8 +690,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast oge float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -715,8 +715,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ult float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -740,8 +740,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ule float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -765,8 +765,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ugt float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -790,8 +790,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast uge float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -816,8 +816,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi double [ %min, %entry ], [ %min.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x double], [1024 x double]* @dA, i64 0, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x double], ptr @dA, i64 0, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 4
   %cmp3 = fcmp fast olt double %0, %min.red.08
   %min.red.0 = select i1 %cmp3, double %0, double %min.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -840,8 +840,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ogt float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -863,8 +863,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ogt float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -878,15 +878,15 @@ for.end:
 ; CHECK-LABEL: @smin_intrinsic(
 ; CHECK: <2 x i32> @llvm.smin.v2i32
 ; CHECK: i32 @llvm.vector.reduce.smin.v2i32
-define i32 @smin_intrinsic(i32* nocapture readonly %x) {
+define i32 @smin_intrinsic(ptr nocapture readonly %x) {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %s.011 = phi i32 [ 100, %entry ], [ %1, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.012
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.012
+  %0 = load i32, ptr %arrayidx, align 4
   %1 = tail call i32 @llvm.smin.i32(i32 %s.011, i32 %0)
   %inc = add nuw nsw i32 %i.012, 1
   %exitcond.not = icmp eq i32 %inc, 1024
@@ -899,15 +899,15 @@ for.cond.cleanup:                                 ; preds = %for.body
 ; CHECK-LABEL: @smax_intrinsic(
 ; CHECK: <2 x i32> @llvm.smax.v2i32
 ; CHECK: i32 @llvm.vector.reduce.smax.v2i32
-define i32 @smax_intrinsic(i32* nocapture readonly %x) {
+define i32 @smax_intrinsic(ptr nocapture readonly %x) {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %s.011 = phi i32 [ 100, %entry ], [ %1, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.012
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.012
+  %0 = load i32, ptr %arrayidx, align 4
   %1 = tail call i32 @llvm.smax.i32(i32 %s.011, i32 %0)
   %inc = add nuw nsw i32 %i.012, 1
   %exitcond.not = icmp eq i32 %inc, 1024
@@ -920,15 +920,15 @@ for.cond.cleanup:                                 ; preds = %for.body
 ; CHECK-LABEL: @umin_intrinsic(
 ; CHECK: <2 x i32> @llvm.umin.v2i32
 ; CHECK: i32 @llvm.vector.reduce.umin.v2i32
-define i32 @umin_intrinsic(i32* nocapture readonly %x) {
+define i32 @umin_intrinsic(ptr nocapture readonly %x) {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %s.011 = phi i32 [ 100, %entry ], [ %1, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.012
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.012
+  %0 = load i32, ptr %arrayidx, align 4
   %1 = tail call i32 @llvm.umin.i32(i32 %s.011, i32 %0)
   %inc = add nuw nsw i32 %i.012, 1
   %exitcond.not = icmp eq i32 %inc, 1024
@@ -941,15 +941,15 @@ for.cond.cleanup:                                 ; preds = %for.body
 ; CHECK-LABEL: @umax_intrinsic(
 ; CHECK: <2 x i32> @llvm.umax.v2i32
 ; CHECK: i32 @llvm.vector.reduce.umax.v2i32
-define i32 @umax_intrinsic(i32* nocapture readonly %x) {
+define i32 @umax_intrinsic(ptr nocapture readonly %x) {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %s.011 = phi i32 [ 100, %entry ], [ %1, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.012
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.012
+  %0 = load i32, ptr %arrayidx, align 4
   %1 = tail call i32 @llvm.umax.i32(i32 %s.011, i32 %0)
   %inc = add nuw nsw i32 %i.012, 1
   %exitcond.not = icmp eq i32 %inc, 1024
@@ -962,7 +962,7 @@ for.cond.cleanup:                                 ; preds = %for.body
 ; CHECK-LABEL: @fmin_intrinsic(
 ; CHECK: nnan nsz <2 x float> @llvm.minnum.v2f32
 ; CHECK: nnan nsz float @llvm.vector.reduce.fmin.v2f32
-define float @fmin_intrinsic(float* nocapture readonly %x) {
+define float @fmin_intrinsic(ptr nocapture readonly %x) {
 entry:
   br label %for.body
 
@@ -972,8 +972,8 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i32 %i.012
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012
+  %0 = load float, ptr %arrayidx, align 4
   %1 = tail call nnan nsz float @llvm.minnum.f32(float %s.011, float %0)
   %inc = add nuw nsw i32 %i.012, 1
   %exitcond.not = icmp eq i32 %inc, 1024
@@ -983,7 +983,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK-LABEL: @fmax_intrinsic(
 ; CHECK: fast <2 x float> @llvm.maxnum.v2f32
 ; CHECK: fast float @llvm.vector.reduce.fmax.v2f32
-define float @fmax_intrinsic(float* nocapture readonly %x) {
+define float @fmax_intrinsic(ptr nocapture readonly %x) {
 entry:
   br label %for.body
 
@@ -993,8 +993,8 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i32 %i.012
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012
+  %0 = load float, ptr %arrayidx, align 4
   %1 = tail call fast float @llvm.maxnum.f32(float %s.011, float %0)
   %inc = add nuw nsw i32 %i.012, 1
   %exitcond.not = icmp eq i32 %inc, 1024
@@ -1003,7 +1003,7 @@ for.body:                                         ; preds = %entry, %for.body
 
 ; CHECK-LABEL: @fmin_intrinsic_nofast(
 ; CHECK-NOT: <2 x float> @llvm.minnum.v2f32
-define float @fmin_intrinsic_nofast(float* nocapture readonly %x) {
+define float @fmin_intrinsic_nofast(ptr nocapture readonly %x) {
 entry:
   br label %for.body
 
@@ -1013,8 +1013,8 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i32 %i.012
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012
+  %0 = load float, ptr %arrayidx, align 4
   %1 = tail call float @llvm.minnum.f32(float %s.011, float %0)
   %inc = add nuw nsw i32 %i.012, 1
   %exitcond.not = icmp eq i32 %inc, 1024
@@ -1023,7 +1023,7 @@ for.body:                                         ; preds = %entry, %for.body
 
 ; CHECK-LABEL: @fmax_intrinsic_nofast(
 ; CHECK-NOT: <2 x float> @llvm.maxnum.v2f32
-define float @fmax_intrinsic_nofast(float* nocapture readonly %x) {
+define float @fmax_intrinsic_nofast(ptr nocapture readonly %x) {
 entry:
   br label %for.body
 
@@ -1033,8 +1033,8 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i32 %i.012
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012
+  %0 = load float, ptr %arrayidx, align 4
   %1 = tail call float @llvm.maxnum.f32(float %s.011, float %0)
   %inc = add nuw nsw i32 %i.012, 1
   %exitcond.not = icmp eq i32 %inc, 1024
@@ -1044,7 +1044,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK-LABEL: @sminmax(
 ; Min and max intrinsics - don't vectorize
 ; CHECK-NOT: <2 x i32>
-define i32 @sminmax(i32* nocapture readonly %x, i32* nocapture readonly %y) {
+define i32 @sminmax(ptr nocapture readonly %x, ptr nocapture readonly %y) {
 entry:
   br label %for.body
 
@@ -1054,11 +1054,11 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %entry, %for.body
   %i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %s.024 = phi i32 [ 0, %entry ], [ %cond9, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.025
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.025
+  %0 = load i32, ptr %arrayidx, align 4
   %s.0. = tail call i32 @llvm.smin.i32(i32 %s.024, i32 %0)
-  %arrayidx3 = getelementptr inbounds i32, i32* %y, i32 %i.025
-  %1 = load i32, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %y, i32 %i.025
+  %1 = load i32, ptr %arrayidx3, align 4
   %cond9 = tail call i32 @llvm.smax.i32(i32 %s.0., i32 %1)
   %inc = add nuw nsw i32 %i.025, 1
   %exitcond.not = icmp eq i32 %inc, 1024
@@ -1069,7 +1069,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: <2 x i32> @llvm.smin.v2i32
 ; CHECK: <2 x i32> @llvm.smin.v2i32
 ; CHECK: i32 @llvm.vector.reduce.smin.v2i32
-define i32 @sminmin(i32* nocapture readonly %x, i32* nocapture readonly %y) {
+define i32 @sminmin(ptr nocapture readonly %x, ptr nocapture readonly %y) {
 entry:
   br label %for.body
 
@@ -1079,11 +1079,11 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %entry, %for.body
   %i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %s.024 = phi i32 [ 0, %entry ], [ %cond9, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.025
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.025
+  %0 = load i32, ptr %arrayidx, align 4
   %s.0. = tail call i32 @llvm.smin.i32(i32 %s.024, i32 %0)
-  %arrayidx3 = getelementptr inbounds i32, i32* %y, i32 %i.025
-  %1 = load i32, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %y, i32 %i.025
+  %1 = load i32, ptr %arrayidx3, align 4
   %cond9 = tail call i32 @llvm.smin.i32(i32 %s.0., i32 %1)
   %inc = add nuw nsw i32 %i.025, 1
   %exitcond.not = icmp eq i32 %inc, 1024

diff  --git a/llvm/test/Transforms/LoopVectorize/mixed-precision-remarks.ll b/llvm/test/Transforms/LoopVectorize/mixed-precision-remarks.ll
index cd42dbce8223e..ba6852418ef29 100644
--- a/llvm/test/Transforms/LoopVectorize/mixed-precision-remarks.ll
+++ b/llvm/test/Transforms/LoopVectorize/mixed-precision-remarks.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -force-vector-interleave=2 -force-vector-width=4 -passes=loop-vectorize -pass-remarks-analysis=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s
 
 ; CHECK: remark: mixed-precision.c:3:26: floating point conversion changes vector width. Mixed floating point precision requires an up/down cast that will negatively impact performance.
-define void @f(float* noalias nocapture %X, i64 %N) {
+define void @f(ptr noalias nocapture %X, i64 %N) {
 entry:
   br label %for.body
 
@@ -10,12 +10,12 @@ for.cond.cleanup:
 
 for.body:
   %i = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %X, i64 %i
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %X, i64 %i
+  %0 = load float, ptr %arrayidx, align 4
   %conv = fpext float %0 to double, !dbg !9
   %mul = fmul double %conv, 0x3FD5555555555555
   %conv3 = fptrunc double %mul to float
-  store float %conv3, float* %arrayidx, align 4
+  store float %conv3, ptr %arrayidx, align 4
   %inc = add nuw i64 %i, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
@@ -24,27 +24,27 @@ for.body:
 ; CHECK: remark: mixed-precision.c:8:8: floating point conversion changes vector width. Mixed floating point precision requires an up/down cast that will negatively impact performance.
 ; CHECK: remark: mixed-precision.c:7:16: floating point conversion changes vector width. Mixed floating point precision requires an up/down cast that will negatively impact performance.
 ; CHECK-NOT: remark: mixed-precision.c:7:16: floating point conversion changes vector width. Mixed floating point precision requires an up/down cast that will negatively impact performance.
-define void @g(float* noalias nocapture %X, float* noalias nocapture %Y, i64 %N) {
+define void @g(ptr noalias nocapture %X, ptr noalias nocapture %Y, i64 %N) {
 entry:
   %pi = alloca double
-  store double 0x400921FB54442D18, double* %pi
-  %fac = load double, double* %pi
+  store double 0x400921FB54442D18, ptr %pi
+  %fac = load double, ptr %pi
   br label %for.body
 
 for.body:
   %i = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %X, i64 %i
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %X, i64 %i
+  %0 = load float, ptr %arrayidx, align 4
   %conv = fpext float %0 to double, !dbg !10
   %mul = fmul double %conv, %fac
   %conv1 = fptrunc double %mul to float
-  store float %conv1, float* %arrayidx, align 4
-  %arrayidx5 = getelementptr inbounds float, float* %Y, i64 %i
-  %1 = load float, float* %arrayidx5, align 4
+  store float %conv1, ptr %arrayidx, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %Y, i64 %i
+  %1 = load float, ptr %arrayidx5, align 4
   %conv2 = fpext float %1 to double, !dbg !11
   %mul2 = fmul double %conv2, %fac
   %conv3 = fptrunc double %mul2 to float
-  store float %conv3, float* %arrayidx5, align 4
+  store float %conv3, ptr %arrayidx5, align 4
   %inc = add nuw nsw i64 %i, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll b/llvm/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
index b47231fa02093..659dc62e7ab58 100644
--- a/llvm/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
+++ b/llvm/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
@@ -15,10 +15,10 @@ entry:
   %n = alloca i32, align 4
   %k7 = alloca i32, align 4
   %nf = alloca i32, align 4
-  %0 = load i32, i32* %k7, align 4
+  %0 = load i32, ptr %k7, align 4
   %.neg1 = sub i32 0, %0
-  %n.promoted = load i32, i32* %n, align 4
-  %nf.promoted = load i32, i32* %nf, align 4
+  %n.promoted = load i32, ptr %n, align 4
+  %nf.promoted = load i32, ptr %nf, align 4
   br label %for.body
 
 for.body:
@@ -36,6 +36,6 @@ for.body:
 
 for.end:
   %add5.lcssa = phi i32 [ %add5, %for.body ]
-  store i32 %add5.lcssa, i32* %n, align 4
+  store i32 %add5.lcssa, ptr %n, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/multiple-exits-versioning.ll b/llvm/test/Transforms/LoopVectorize/multiple-exits-versioning.ll
index 6b2d72c0fb2a3..dd48b0e4416bd 100644
--- a/llvm/test/Transforms/LoopVectorize/multiple-exits-versioning.ll
+++ b/llvm/test/Transforms/LoopVectorize/multiple-exits-versioning.ll
@@ -5,12 +5,12 @@
 
 ; Multiple branches exiting the loop to a unique exit block. The loop should
 ; be vectorized with versioning.
-define void @multiple_exits_unique_exit_block(i32* %A, i32* %B, i64 %N) {
+define void @multiple_exits_unique_exit_block(ptr %A, ptr %B, i64 %N) {
 ; CHECK-LABEL: @multiple_exits_unique_exit_block
 ; CHECK:       vector.memcheck:
 ; CHECK-LABEL: vector.body:
-; CHECK:         %wide.load = load <2 x i32>, <2 x i32>* {{.*}}, align 4
-; CHECK:         store <2 x i32> %wide.load, <2 x i32>* {{.*}}, align 4
+; CHECK:         %wide.load = load <2 x i32>, ptr {{.*}}, align 4
+; CHECK:         store <2 x i32> %wide.load, ptr {{.*}}, align 4
 ; CHECK:         br
 ;
 entry:
@@ -22,10 +22,10 @@ loop.header:
   br i1 %cond.0, label %exit, label %for.body
 
 for.body:
-  %A.gep = getelementptr inbounds i32, i32* %A, i64 %iv
-  %lv = load i32, i32* %A.gep, align 4
-  %B.gep = getelementptr inbounds i32, i32* %B, i64 %iv
-  store i32 %lv, i32* %B.gep, align 4
+  %A.gep = getelementptr inbounds i32, ptr %A, i64 %iv
+  %lv = load i32, ptr %A.gep, align 4
+  %B.gep = getelementptr inbounds i32, ptr %B, i64 %iv
+  store i32 %lv, ptr %B.gep, align 4
   %iv.next = add nuw i64 %iv, 1
   %cond.1 = icmp ult i64 %iv.next, 1000
   br i1 %cond.1, label %loop.header, label %exit
@@ -36,7 +36,7 @@ exit:
 
 
 ; Multiple branches exiting the loop to 
diff erent blocks. Currently this is not supported.
-define i32 @multiple_exits_multiple_exit_blocks(i32* %A, i32* %B, i64 %N) {
+define i32 @multiple_exits_multiple_exit_blocks(ptr %A, ptr %B, i64 %N) {
 ; CHECK-LABEL: @multiple_exits_multiple_exit_blocks
 ; CHECK-NEXT:    entry:
 ; CHECK:           br label %loop.header
@@ -51,10 +51,10 @@ loop.header:
   br i1 %cond.0, label %exit.0, label %for.body
 
 for.body:
-  %A.gep = getelementptr inbounds i32, i32* %A, i64 %iv
-  %lv = load i32, i32* %A.gep, align 4
-  %B.gep = getelementptr inbounds i32, i32* %B, i64 %iv
-  store i32 %lv, i32* %B.gep, align 4
+  %A.gep = getelementptr inbounds i32, ptr %A, i64 %iv
+  %lv = load i32, ptr %A.gep, align 4
+  %B.gep = getelementptr inbounds i32, ptr %B, i64 %iv
+  store i32 %lv, ptr %B.gep, align 4
   %iv.next = add nuw i64 %iv, 1
   %cond.1 = icmp ult i64 %iv.next, 1000
   br i1 %cond.1, label %loop.header, label %exit.1

diff  --git a/llvm/test/Transforms/LoopVectorize/no_array_bounds.ll b/llvm/test/Transforms/LoopVectorize/no_array_bounds.ll
index 34162c14bf318..a280c7121d2b0 100644
--- a/llvm/test/Transforms/LoopVectorize/no_array_bounds.ll
+++ b/llvm/test/Transforms/LoopVectorize/no_array_bounds.ll
@@ -17,7 +17,7 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: nounwind ssp uwtable
-define void @_Z4testPiS_i(i32* nocapture %A, i32* nocapture %B, i32 %number) #0 !dbg !4 {
+define void @_Z4testPiS_i(ptr nocapture %A, ptr nocapture %B, i32 %number) #0 !dbg !4 {
 entry:
   %cmp25 = icmp sgt i32 %number, 0, !dbg !10
   br i1 %cmp25, label %for.body.preheader, label %for.end15, !dbg !10, !llvm.loop !12
@@ -33,13 +33,13 @@ for.body7.preheader:                              ; preds = %for.cond5.preheader
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv27 = phi i64 [ %indvars.iv.next28, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv27, !dbg !14
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !14, !tbaa !22
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv27, !dbg !14
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !14, !tbaa !22
   %idxprom1 = sext i32 %0 to i64, !dbg !14
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1, !dbg !14
-  %1 = load i32, i32* %arrayidx2, align 4, !dbg !14, !tbaa !22
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %idxprom1, !dbg !14
+  %1 = load i32, ptr %arrayidx2, align 4, !dbg !14, !tbaa !22
   %inc = add nsw i32 %1, 1, !dbg !14
-  store i32 %inc, i32* %arrayidx2, align 4, !dbg !14, !tbaa !22
+  store i32 %inc, ptr %arrayidx2, align 4, !dbg !14, !tbaa !22
   %indvars.iv.next28 = add nuw nsw i64 %indvars.iv27, 1, !dbg !10
   %lftr.wideiv29 = trunc i64 %indvars.iv.next28 to i32, !dbg !10
   %exitcond30 = icmp eq i32 %lftr.wideiv29, %number, !dbg !10
@@ -47,13 +47,13 @@ for.body:                                         ; preds = %for.body.preheader,
 
 for.body7:                                        ; preds = %for.body7.preheader, %for.body7
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body7 ], [ 0, %for.body7.preheader ]
-  %arrayidx9 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !20
-  %2 = load i32, i32* %arrayidx9, align 4, !dbg !20, !tbaa !22
+  %arrayidx9 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !20
+  %2 = load i32, ptr %arrayidx9, align 4, !dbg !20, !tbaa !22
   %idxprom10 = sext i32 %2 to i64, !dbg !20
-  %arrayidx11 = getelementptr inbounds i32, i32* %B, i64 %idxprom10, !dbg !20
-  %3 = load i32, i32* %arrayidx11, align 4, !dbg !20, !tbaa !22
+  %arrayidx11 = getelementptr inbounds i32, ptr %B, i64 %idxprom10, !dbg !20
+  %3 = load i32, ptr %arrayidx11, align 4, !dbg !20, !tbaa !22
   %inc12 = add nsw i32 %3, 1, !dbg !20
-  store i32 %inc12, i32* %arrayidx11, align 4, !dbg !20, !tbaa !22
+  store i32 %inc12, ptr %arrayidx11, align 4, !dbg !20, !tbaa !22
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !16
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !16
   %exitcond = icmp eq i32 %lftr.wideiv, %number, !dbg !16

diff  --git a/llvm/test/Transforms/LoopVectorize/no_array_bounds_scalable.ll b/llvm/test/Transforms/LoopVectorize/no_array_bounds_scalable.ll
index f2bb09d5bc497..03918a103b214 100644
--- a/llvm/test/Transforms/LoopVectorize/no_array_bounds_scalable.ll
+++ b/llvm/test/Transforms/LoopVectorize/no_array_bounds_scalable.ll
@@ -9,7 +9,7 @@
 ;  }
 
 ; CHECK: warning: <unknown>:0:0: loop not interleaved: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
-define dso_local void @foo(i32* nocapture %A, i32* nocapture readonly %B, i32 %N) {
+define dso_local void @foo(ptr nocapture %A, ptr nocapture readonly %B, i32 %N) {
 entry:
   %cmp7 = icmp sgt i32 %N, 0
   br i1 %cmp7, label %for.body.preheader, label %for.end
@@ -20,13 +20,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %idxprom1 = sext i32 %0 to i64
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %idxprom1
+  %1 = load i32, ptr %arrayidx2, align 4
   %inc = add nsw i32 %1, 1
-  store i32 %inc, i32* %arrayidx2, align 4
+  store i32 %inc, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -36,7 +36,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ; CHECK: warning: <unknown>:0:0: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
-define dso_local void @foo2(i32* nocapture %A, i32* nocapture readonly %B, i32 %N) {
+define dso_local void @foo2(ptr nocapture %A, ptr nocapture readonly %B, i32 %N) {
 entry:
   %cmp7 = icmp sgt i32 %N, 0
   br i1 %cmp7, label %for.body.preheader, label %for.end
@@ -47,13 +47,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %idxprom1 = sext i32 %0 to i64
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %idxprom1
+  %1 = load i32, ptr %arrayidx2, align 4
   %inc = add nsw i32 %1, 1
-  store i32 %inc, i32* %arrayidx2, align 4
+  store i32 %inc, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !3

diff  --git a/llvm/test/Transforms/LoopVectorize/no_idiv_reduction.ll b/llvm/test/Transforms/LoopVectorize/no_idiv_reduction.ll
index c44844fa5d495..b661a9a773b80 100644
--- a/llvm/test/Transforms/LoopVectorize/no_idiv_reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/no_idiv_reduction.ll
@@ -11,8 +11,8 @@ for.body:
   ; CHECK-NOT: sdiv <2 x i32>
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %r.05 = phi i32 [ 80, %entry ], [ %div, %for.body ]
-  %arrayidx = getelementptr inbounds [128 x i32], [128 x i32]* @a, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [128 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %div = sdiv i32 %r.05, %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32

diff  --git a/llvm/test/Transforms/LoopVectorize/no_int_induction.ll b/llvm/test/Transforms/LoopVectorize/no_int_induction.ll
index 73080bc8514da..efb872aa2de16 100644
--- a/llvm/test/Transforms/LoopVectorize/no_int_induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/no_int_induction.ll
@@ -12,19 +12,19 @@ target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-
 ;CHECK: load <4 x i32>
 ;CHECK: add <4 x i32>
 ;CHECK: ret i32
-define i32 @sum_array(i32* %A, i32 %n) nounwind uwtable readonly noinline ssp {
+define i32 @sum_array(ptr %A, i32 %n) nounwind uwtable readonly noinline ssp {
   %1 = sext i32 %n to i64
-  %2 = getelementptr inbounds i32, i32* %A, i64 %1
+  %2 = getelementptr inbounds i32, ptr %A, i64 %1
   %3 = icmp eq i32 %n, 0
   br i1 %3, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
 
 .lr.ph.i:                                         ; preds = %0, %.lr.ph.i
-  %.03.i = phi i32* [ %6, %.lr.ph.i ], [ %A, %0 ]
+  %.03.i = phi ptr [ %6, %.lr.ph.i ], [ %A, %0 ]
   %.012.i = phi i32 [ %5, %.lr.ph.i ], [ 0, %0 ]
-  %4 = load i32, i32* %.03.i, align 4
+  %4 = load i32, ptr %.03.i, align 4
   %5 = add nsw i32 %4, %.012.i
-  %6 = getelementptr inbounds i32, i32* %.03.i, i64 1
-  %7 = icmp eq i32* %6, %2
+  %6 = getelementptr inbounds i32, ptr %.03.i, i64 1
+  %7 = icmp eq ptr %6, %2
   br i1 %7, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
 
 _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %.lr.ph.i, %0
@@ -39,19 +39,19 @@ _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %.lr.ph.i, %0
 ;CHECK: load <4 x i32>
 ;CHECK: add <4 x i32>
 ;CHECK: ret i32
-define i32 @sum_array_as1(i32 addrspace(1)* %A, i32 %n) nounwind uwtable readonly noinline ssp {
+define i32 @sum_array_as1(ptr addrspace(1) %A, i32 %n) nounwind uwtable readonly noinline ssp {
   %1 = sext i32 %n to i64
-  %2 = getelementptr inbounds i32, i32 addrspace(1)* %A, i64 %1
+  %2 = getelementptr inbounds i32, ptr addrspace(1) %A, i64 %1
   %3 = icmp eq i32 %n, 0
   br i1 %3, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
 
 .lr.ph.i:                                         ; preds = %0, %.lr.ph.i
-  %.03.i = phi i32 addrspace(1)* [ %6, %.lr.ph.i ], [ %A, %0 ]
+  %.03.i = phi ptr addrspace(1) [ %6, %.lr.ph.i ], [ %A, %0 ]
   %.012.i = phi i32 [ %5, %.lr.ph.i ], [ 0, %0 ]
-  %4 = load i32, i32 addrspace(1)* %.03.i, align 4
+  %4 = load i32, ptr addrspace(1) %.03.i, align 4
   %5 = add nsw i32 %4, %.012.i
-  %6 = getelementptr inbounds i32, i32 addrspace(1)* %.03.i, i64 1
-  %7 = icmp eq i32 addrspace(1)* %6, %2
+  %6 = getelementptr inbounds i32, ptr addrspace(1) %.03.i, i64 1
+  %7 = icmp eq ptr addrspace(1) %6, %2
   br i1 %7, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
 
 _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %.lr.ph.i, %0

diff  --git a/llvm/test/Transforms/LoopVectorize/no_switch.ll b/llvm/test/Transforms/LoopVectorize/no_switch.ll
index 5bcf5ba5b3566..c62826f9554e6 100644
--- a/llvm/test/Transforms/LoopVectorize/no_switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/no_switch.ll
@@ -19,7 +19,7 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z11test_switchPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 {
+define void @_Z11test_switchPii(ptr nocapture %A, i32 %Length) #0 !dbg !4 {
 entry:
   %cmp18 = icmp sgt i32 %Length, 0, !dbg !10
   br i1 %cmp18, label %for.body.preheader, label %for.end, !dbg !10, !llvm.loop !12
@@ -29,8 +29,8 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !14
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !14, !tbaa !16
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !14
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !14, !tbaa !16
   switch i32 %0, label %for.inc [
     i32 0, label %sw.bb
     i32 1, label %sw.bb3
@@ -43,12 +43,12 @@ sw.bb:                                            ; preds = %for.body
 
 sw.bb3:                                           ; preds = %for.body
   %2 = trunc i64 %indvars.iv to i32, !dbg !23
-  store i32 %2, i32* %arrayidx, align 4, !dbg !23, !tbaa !16
+  store i32 %2, ptr %arrayidx, align 4, !dbg !23, !tbaa !16
   br label %for.inc, !dbg !23
 
 for.inc:                                          ; preds = %sw.bb3, %for.body, %sw.bb
   %storemerge = phi i32 [ %mul, %sw.bb ], [ 0, %for.body ], [ 0, %sw.bb3 ]
-  store i32 %storemerge, i32* %arrayidx, align 4, !dbg !20, !tbaa !16
+  store i32 %storemerge, ptr %arrayidx, align 4, !dbg !20, !tbaa !16
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10
   %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !10

diff  --git a/llvm/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll b/llvm/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll
index 3aa6edc527a07..d9ebaebb16a0c 100644
--- a/llvm/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll
@@ -21,7 +21,7 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z11test_switchPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 {
+define void @_Z11test_switchPii(ptr nocapture %A, i32 %Length) #0 !dbg !4 {
 entry:
   %cmp18 = icmp sgt i32 %Length, 0, !dbg !10
   br i1 %cmp18, label %for.body.preheader, label %for.end, !dbg !10, !llvm.loop !12
@@ -31,8 +31,8 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !14
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !14, !tbaa !16
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !14
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !14, !tbaa !16
   switch i32 %0, label %for.inc [
     i32 0, label %sw.bb
     i32 1, label %sw.bb3
@@ -45,12 +45,12 @@ sw.bb:                                            ; preds = %for.body
 
 sw.bb3:                                           ; preds = %for.body
   %2 = trunc i64 %indvars.iv to i32, !dbg !23
-  store i32 %2, i32* %arrayidx, align 4, !dbg !23, !tbaa !16
+  store i32 %2, ptr %arrayidx, align 4, !dbg !23, !tbaa !16
   br label %for.inc, !dbg !23
 
 for.inc:                                          ; preds = %sw.bb3, %for.body, %sw.bb
   %storemerge = phi i32 [ %mul, %sw.bb ], [ 0, %for.body ], [ 0, %sw.bb3 ]
-  store i32 %storemerge, i32* %arrayidx, align 4, !dbg !20, !tbaa !16
+  store i32 %storemerge, ptr %arrayidx, align 4, !dbg !20, !tbaa !16
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10
   %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !10

diff  --git a/llvm/test/Transforms/LoopVectorize/noalias-md-licm.ll b/llvm/test/Transforms/LoopVectorize/noalias-md-licm.ll
index be27c4a947637..c9c45e1a671c7 100644
--- a/llvm/test/Transforms/LoopVectorize/noalias-md-licm.ll
+++ b/llvm/test/Transforms/LoopVectorize/noalias-md-licm.ll
@@ -14,36 +14,36 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; LICM'ed from the inner loop.
 
 
-define void @f(i32* %a, i32* %b, i32* %c) {
+define void @f(ptr %a, ptr %b, ptr %c) {
 entry:
   br label %outer
 
 outer:
   %i.2 = phi i64 [ 0, %entry ], [ %i, %inner.end ]
-  %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %i.2
+  %arrayidxB = getelementptr inbounds i32, ptr %b, i64 %i.2
   br label %inner.ph
 
 inner.ph:
 ; CHECK: vector.ph:
-; CHECK: load i32, i32* %arrayidxB,
+; CHECK: load i32, ptr %arrayidxB,
 ; CHECK: br label %vector.body
   br label %inner
 
 inner:
   %j.2 = phi i64 [ 0, %inner.ph ], [ %j, %inner ]
 
-  %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %j.2
-  %loadA = load i32, i32* %arrayidxA, align 4
+  %arrayidxA = getelementptr inbounds i32, ptr %a, i64 %j.2
+  %loadA = load i32, ptr %arrayidxA, align 4
 
-  %loadB = load i32, i32* %arrayidxB, align 4
+  %loadB = load i32, ptr %arrayidxB, align 4
 
-  %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %j.2
-  %loadC = load i32, i32* %arrayidxC, align 4
+  %arrayidxC = getelementptr inbounds i32, ptr %c, i64 %j.2
+  %loadC = load i32, ptr %arrayidxC, align 4
 
   %add = add nuw i32 %loadA, %loadB
   %add2 = add nuw i32 %add, %loadC
 
-  store i32 %add2, i32* %arrayidxA, align 4
+  store i32 %add2, ptr %arrayidxA, align 4
 
   %j = add nuw nsw i64 %j.2, 1
   %cond1 = icmp eq i64 %j, 20

diff  --git a/llvm/test/Transforms/LoopVectorize/noalias-md.ll b/llvm/test/Transforms/LoopVectorize/noalias-md.ll
index cf3a2a587c492..111aafc620434 100644
--- a/llvm/test/Transforms/LoopVectorize/noalias-md.ll
+++ b/llvm/test/Transforms/LoopVectorize/noalias-md.ll
@@ -22,7 +22,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; store to C[i] can be DSE'd.
 
 
-define void @f(i32* %a, i32* %b, i32* %c) {
+define void @f(ptr %a, ptr %b, ptr %c) {
 entry:
   br label %for.body
 
@@ -31,30 +31,30 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
 
-  %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+  %arrayidxA = getelementptr inbounds i32, ptr %a, i64 %ind
 ; Scope 1
 ; LV: = load {{.*}} !alias.scope !0
-  %loadA = load i32, i32* %arrayidxA, align 4
+  %loadA = load i32, ptr %arrayidxA, align 4
 
   %add = add nuw i32 %loadA, 2
 
-  %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+  %arrayidxC = getelementptr inbounds i32, ptr %c, i64 %ind
 ; Noalias with scope 1 and 6
 ; LV: store {{.*}} !alias.scope !3, !noalias !5
 ; DSE-NOT: store
-  store i32 %add, i32* %arrayidxC, align 4
+  store i32 %add, ptr %arrayidxC, align 4
 
-  %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+  %arrayidxB = getelementptr inbounds i32, ptr %b, i64 %ind
 ; Scope 6
 ; LV: = load {{.*}} !alias.scope !7
-  %loadB = load i32, i32* %arrayidxB, align 4
+  %loadB = load i32, ptr %arrayidxB, align 4
 
   %add2 = add nuw i32 %add, %loadB
 
 ; Noalias with scope 1 and 6
 ; LV: store {{.*}} !alias.scope !3, !noalias !5
 ; DSE: store
-  store i32 %add2, i32* %arrayidxC, align 4
+  store i32 %add2, ptr %arrayidxC, align 4
 
   %inc = add nuw nsw i64 %ind, 1
   %exitcond = icmp eq i64 %inc, 20

diff  --git a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll
index 849cd9350ee45..a8e5eb2a19dad 100644
--- a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll
+++ b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=2  -S | FileCheck %s
 
-define void @test1(float* noalias nocapture %a, float* noalias nocapture readonly %b) {
+define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
 entry:
   br label %for.body
 
@@ -15,13 +15,13 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp ogt float %0, 1.000000e+02
   tail call void @llvm.experimental.noalias.scope.decl(metadata !0)
   %add = fadd float %0, 1.000000e+00
-  %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, 1599
   br i1 %exitcond, label %for.end, label %for.body
@@ -32,18 +32,17 @@ for.end:                                          ; preds = %for.body
 
 declare void @llvm.experimental.noalias.scope.decl(metadata)
 
-%struct.data = type { float*, float* }
+%struct.data = type { ptr, ptr }
 
-define void @test2(%struct.data* nocapture readonly %d) {
+define void @test2(ptr nocapture readonly %d) {
 entry:
-  %b = getelementptr inbounds %struct.data, %struct.data* %d, i64 0, i32 1
-  %0 = load float*, float** %b, align 8
-  %ptrint = ptrtoint float* %0 to i64
+  %b = getelementptr inbounds %struct.data, ptr %d, i64 0, i32 1
+  %0 = load ptr, ptr %b, align 8
+  %ptrint = ptrtoint ptr %0 to i64
   %maskedptr = and i64 %ptrint, 31
   %maskcond = icmp eq i64 %maskedptr, 0
-  %a = getelementptr inbounds %struct.data, %struct.data* %d, i64 0, i32 0
-  %1 = load float*, float** %a, align 8
-  %ptrint2 = ptrtoint float* %1 to i64
+  %1 = load ptr, ptr %d, align 8
+  %ptrint2 = ptrtoint ptr %1 to i64
   %maskedptr3 = and i64 %ptrint2, 31
   %maskcond4 = icmp eq i64 %maskedptr3, 0
   br label %for.body
@@ -62,12 +61,12 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   tail call void @llvm.experimental.noalias.scope.decl(metadata !0)
-  %arrayidx = getelementptr inbounds float, float* %0, i64 %indvars.iv
-  %2 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %0, i64 %indvars.iv
+  %2 = load float, ptr %arrayidx, align 4
   %add = fadd float %2, 1.000000e+00
   tail call void @llvm.experimental.noalias.scope.decl(metadata !4)
-  %arrayidx5 = getelementptr inbounds float, float* %1, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %1, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, 1599
   br i1 %exitcond, label %for.end, label %for.body
@@ -76,7 +75,7 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-define void @predicated_noalias_scope_decl(float* noalias nocapture readonly %a, float* noalias nocapture %b, i32 %n) {
+define void @predicated_noalias_scope_decl(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i32 %n) {
 
 ; Check that the vector.body still contains a llvm.experimental.noalias.scope.decl
 
@@ -117,11 +116,11 @@ if.else:                                          ; preds = %for.body
 
 if.end5:                                          ; preds = %for.body, %if.else
   %x.0 = phi float [ 4.200000e+01, %if.else ], [ 2.300000e+01, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx, align 4
   %mul = fmul float %x.0, %1
-  %arrayidx7 = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  store float %mul, float* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  store float %mul, ptr %arrayidx7, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %cmp = icmp eq i64 %indvars.iv.next, %0
   br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/nofloat-report.ll b/llvm/test/Transforms/LoopVectorize/nofloat-report.ll
index 7026fd5a9ee15..a61e0f6643516 100644
--- a/llvm/test/Transforms/LoopVectorize/nofloat-report.ll
+++ b/llvm/test/Transforms/LoopVectorize/nofloat-report.ll
@@ -14,9 +14,9 @@ define void @example_nofloat() noimplicitfloat { ;           <--------- "noimpli
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %2 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
   %3 = trunc i64 %indvars.iv to i32
-  store i32 %3, i32* %2, align 4
+  store i32 %3, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024

diff  --git a/llvm/test/Transforms/LoopVectorize/nofloat.ll b/llvm/test/Transforms/LoopVectorize/nofloat.ll
index 6b4d11912224f..08c42f3fbafc5 100644
--- a/llvm/test/Transforms/LoopVectorize/nofloat.ll
+++ b/llvm/test/Transforms/LoopVectorize/nofloat.ll
@@ -14,9 +14,9 @@ define void @example12() noimplicitfloat { ;           <--------- "noimplicitflo
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %2 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
   %3 = trunc i64 %indvars.iv to i32
-  store i32 %3, i32* %2, align 4
+  store i32 %3, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024

diff  --git a/llvm/test/Transforms/LoopVectorize/non-const-n.ll b/llvm/test/Transforms/LoopVectorize/non-const-n.ll
index d2033ad6080cb..a7d148af4f74f 100644
--- a/llvm/test/Transforms/LoopVectorize/non-const-n.ll
+++ b/llvm/test/Transforms/LoopVectorize/non-const-n.ll
@@ -19,13 +19,13 @@ define void @example1(i32 %n) nounwind uwtable ssp {
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %6, i32* %7, align 4
+  %7 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %6, ptr %7, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n4

diff  --git a/llvm/test/Transforms/LoopVectorize/nontemporal.ll b/llvm/test/Transforms/LoopVectorize/nontemporal.ll
index 55a0fc3ad76be..528c4b3ff2dcd 100644
--- a/llvm/test/Transforms/LoopVectorize/nontemporal.ll
+++ b/llvm/test/Transforms/LoopVectorize/nontemporal.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
 ; CHECK-LABEL: @foo(
-define void @foo(float* noalias %a, float* noalias %b, float* noalias %c, i32 %N) {
+define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i32 %N) {
 entry:
   %cmp.4 = icmp sgt i32 %N, 0
   br i1 %cmp.4, label %for.body.preheader, label %for.end
@@ -16,19 +16,19 @@ for.body:                                         ; preds = %for.body.preheader,
 
 ; Check that we don't lose !nontemporal hint when attempting vectorizing of loads.
 ; CHECK: load {{.*}} align 4, !nontemporal !0
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4, !nontemporal !0
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4, !nontemporal !0
 
 ; Check that we don't introduce !nontemporal hint when the original scalar loads didn't have it.
 ; CHECK: load {{.*}} align 4{{$}}
-  %arrayidx2 = getelementptr inbounds float, float* %c, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %c, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %add = fadd float %0, %1
 
 ; Check that we don't lose !nontemporal hint when attempting vectorizing of stores.
 ; CHECK: store {{.*}} align 4, !nontemporal !0
-  %arrayidx4 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %add, float* %arrayidx4, align 4, !nontemporal !0
+  %arrayidx4 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %add, ptr %arrayidx4, align 4, !nontemporal !0
 
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32

diff  --git a/llvm/test/Transforms/LoopVectorize/nounroll.ll b/llvm/test/Transforms/LoopVectorize/nounroll.ll
index 020c0ff36e13d..7e4decb4dedb0 100644
--- a/llvm/test/Transforms/LoopVectorize/nounroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/nounroll.ll
@@ -5,7 +5,7 @@ target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512"
 
 ; CHECK: LV: Checking a loop in 'f1'
 ; CHECK: LV: Loop hints: force=? width=0 interleave=1
-define dso_local void @f1(i32 signext %n, i32* %A) {
+define dso_local void @f1(i32 signext %n, ptr %A) {
 entry:
   %cmp1 = icmp sgt i32 %n, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
@@ -16,9 +16,9 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
   %0 = trunc i64 %indvars.iv to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.body, label %for.end.loopexit, !llvm.loop !1
@@ -32,7 +32,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 
 ; CHECK: LV: Checking a loop in 'f2'
 ; CHECK: LV: Loop hints: force=? width=0 interleave=4
-define dso_local void @f2(i32 signext %n, i32* %A) {
+define dso_local void @f2(i32 signext %n, ptr %A) {
 entry:
   %cmp1 = icmp sgt i32 %n, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
@@ -43,9 +43,9 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
   %0 = trunc i64 %indvars.iv to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.body, label %for.end.loopexit, !llvm.loop !3
@@ -59,7 +59,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 
 ; CHECK: LV: Checking a loop in 'f3'
 ; CHECK: LV: Loop hints: force=? width=0 interleave=1
-define dso_local void @f3(i32 signext %n, i32* %A) {
+define dso_local void @f3(i32 signext %n, ptr %A) {
 entry:
   %cmp1 = icmp sgt i32 %n, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
@@ -70,9 +70,9 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
   %0 = trunc i64 %indvars.iv to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.body, label %for.end.loopexit, !llvm.loop !6

diff  --git a/llvm/test/Transforms/LoopVectorize/novect-lcssa-cfg-invalidation.ll b/llvm/test/Transforms/LoopVectorize/novect-lcssa-cfg-invalidation.ll
index 00521e2980c9f..24bcfe079ae80 100644
--- a/llvm/test/Transforms/LoopVectorize/novect-lcssa-cfg-invalidation.ll
+++ b/llvm/test/Transforms/LoopVectorize/novect-lcssa-cfg-invalidation.ll
@@ -5,7 +5,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; Checks what analyses are invalidated after Loop Vectorization when no actual
 ; vectorization happens, and the only change LV makes is LCSSA formation.
 
-define i32 @novect(i32* %p) {
+define i32 @novect(ptr %p) {
 
 ; CHECK:           Running pass: LoopVectorizePass on novect
 ; CHECK:           Invalidating analysis: ScalarEvolutionAnalysis on novect
@@ -19,7 +19,7 @@ define i32 @novect(i32* %p) {
 ; CHECK:             br label %middle
 ; CHECK:           middle:
 ; CHECK:             %iv = phi i32 [ 0, %entry ], [ %iv.next, %middle ]
-; CHECK:             %x = load volatile i32, i32* %p
+; CHECK:             %x = load volatile i32, ptr %p
 ; CHECK:             %iv.next = add i32 %iv, 1
 ; CHECK:             %cond = icmp slt i32 %iv, 1000
 ; CHECK:             br i1 %cond, label %exit, label %middle
@@ -32,7 +32,7 @@ entry:
 
 middle:
   %iv = phi i32 [0, %entry], [%iv.next, %middle]
-  %x = load volatile i32, i32* %p
+  %x = load volatile i32, ptr %p
   %iv.next = add i32 %iv, 1
   %cond = icmp slt i32 %iv, 1000
   br i1 %cond, label %exit, label %middle

diff  --git a/llvm/test/Transforms/LoopVectorize/nuw.ll b/llvm/test/Transforms/LoopVectorize/nuw.ll
index e6476a8059736..c3b2d0766ea4e 100644
--- a/llvm/test/Transforms/LoopVectorize/nuw.ll
+++ b/llvm/test/Transforms/LoopVectorize/nuw.ll
@@ -2,7 +2,7 @@
 
 ; Fixes PR43828
 
-define void @test(i32* %B) {
+define void @test(ptr %B) {
 ; CHECK-LABEL: @test(
 ; CHECK:       vector.body:
 ; CHECK-COUNT-2: sub <4 x i32>
@@ -23,7 +23,7 @@ inner_loop:
 
 outer_tail:
   %3 = phi i32 [ %0, %inner_loop ]
-  store atomic i32 %3, i32 * %B unordered, align 8
+  store atomic i32 %3, ptr %B unordered, align 8
   %4 = add i32 %local_4, 1
   %5 = icmp slt i32 %4, 6
   br i1 %5, label %outer_loop, label %exit
@@ -32,7 +32,7 @@ exit:
   ret void
 }
 
-define i32 @multi-instr(i32* noalias nocapture %A, i32* noalias nocapture %B, i32 %inc) {
+define i32 @multi-instr(ptr noalias nocapture %A, ptr noalias nocapture %B, i32 %inc) {
 ; CHECK-LABEL: @multi-instr(
 ; CHECK:       vector.body:
 ; CHECK-COUNT-4: add <4 x i32>
@@ -42,10 +42,10 @@ entry:
 loop:
   %iv = phi i32 [0, %entry], [%iv_inc, %loop]
   %redu = phi i32 [0, %entry], [%3, %loop]
-  %gepa = getelementptr inbounds i32, i32* %A, i32 %iv
-  %gepb = getelementptr inbounds i32, i32* %B, i32 %iv
-  %0 = load i32, i32* %gepa
-  %1 = load i32, i32* %gepb
+  %gepa = getelementptr inbounds i32, ptr %A, i32 %iv
+  %gepb = getelementptr inbounds i32, ptr %B, i32 %iv
+  %0 = load i32, ptr %gepa
+  %1 = load i32, ptr %gepb
   %2 = add nuw nsw i32 %redu, %0
   %3 = add nuw nsw i32 %2, %1
   %iv_inc = add nuw nsw i32 %iv, 1

diff  --git a/llvm/test/Transforms/LoopVectorize/opt.ll b/llvm/test/Transforms/LoopVectorize/opt.ll
index c8f73029fcc98..37b6ed4b010d1 100644
--- a/llvm/test/Transforms/LoopVectorize/opt.ll
+++ b/llvm/test/Transforms/LoopVectorize/opt.ll
@@ -8,15 +8,15 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; LOOPVEC:       add <2 x i32>
 ; NOLOOPVEC-NOT: add <2 x i32>
 
-define i32 @vect(i32* %a) {
+define i32 @vect(ptr %a) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, %red.05
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 255

diff  --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll
index a7affd1116e6e..56d27ffcbddfd 100644
--- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll
+++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll
@@ -7,7 +7,7 @@ target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512"
 ; CHECK: LV: Checking a loop in 'f2'
 ; CHECK: LEV: Unable to vectorize epilogue because the loop is not a supported candidate.
 
-define signext i32 @f2(i8* noalias %A, i32 signext %n) {
+define signext i32 @f2(ptr noalias %A, i32 signext %n) {
 entry:
   %cmp1 = icmp sgt i32 %n, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
@@ -18,11 +18,11 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx, align 1
   %add = add i8 %0, 1
-  %arrayidx3 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv
-  store i8 %add, i8* %arrayidx3, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv
+  store i8 %add, ptr %arrayidx3, align 1
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.body, label %for.end.loopexit
@@ -41,7 +41,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; CHECK: LV: Checking a loop in 'f3'
 ; CHECK: LEV: Unable to vectorize epilogue because the loop is not a supported candidate.
 
-define void @f3(i8* noalias %A, i32 signext %n) {
+define void @f3(ptr noalias %A, i32 signext %n) {
 entry:
   %cmp1 = icmp sgt i32 %n, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
@@ -54,8 +54,8 @@ for.body:                                         ; preds = %for.body.preheader,
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
   %0 = trunc i64 %indvars.iv to i32
   %conv = trunc i32 %0 to i8
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv
-  store i8 %conv, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %indvars.iv
+  store i8 %conv, ptr %arrayidx, align 1
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.body, label %for.end.loopexit

diff  --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll
index ce8e25afea897..336bba76000c3 100644
--- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll
+++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll
@@ -8,14 +8,14 @@ target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512"
 ; CHECK: LEV: Epilogue vectorization factor is forced.
 ; CHECK: Epilogue Loop VF:2, Epilogue Loop UF:1
 
-define void @f1(i8* %A) {
+define void @f1(ptr %A) {
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %iv
-  store i8 1, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %iv
+  store i8 1, ptr %arrayidx, align 1
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp ne i64 %iv.next, 1024
   br i1 %exitcond, label %for.body, label %exit, !llvm.loop !0

diff  --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll
index 9f6ee7e0d7bf5..1c40df3e3cbe8 100644
--- a/llvm/test/Transforms/LoopVectorize/optsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/optsize.ll
@@ -19,11 +19,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %i.08, 202
   br i1 %exitcond, label %for.end, label %for.body
@@ -45,11 +45,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %i.08, 202
   br i1 %exitcond, label %for.end, label %for.body
@@ -71,11 +71,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %i.08, 202
   br i1 %exitcond, label %for.end, label %for.body
@@ -100,8 +100,8 @@ define void @pr43371() optsize {
 ; the non-consecutive loads/stores can be scalarized:
 ;
 ; CHECK: vector.body:
-; CHECK: store i16 0, i16* %{{.*}}, align 1
-; CHECK: store i16 0, i16* %{{.*}}, align 1
+; CHECK: store i16 0, ptr %{{.*}}, align 1
+; CHECK: store i16 0, ptr %{{.*}}, align 1
 ; CHECK: br i1 {{.*}}, label %vector.body
 ;
 entry:
@@ -114,8 +114,8 @@ for.body29:
   %i24.0170 = phi i16 [ 0, %entry], [ %inc37, %for.body29]
   %add33 = add i16 undef, %i24.0170
   %idxprom34 = zext i16 %add33 to i32
-  %arrayidx35 = getelementptr [2592 x i16], [2592 x i16] * @cm_array, i32 0, i32 %idxprom34
-  store i16 0, i16 * %arrayidx35, align 1
+  %arrayidx35 = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 %idxprom34
+  store i16 0, ptr %arrayidx35, align 1
   %inc37 = add i16 %i24.0170, 1
   %cmp26 = icmp ult i16 %inc37, 756
   br i1 %cmp26, label %for.body29, label %for.cond.cleanup28
@@ -132,8 +132,8 @@ define void @pr43371_pgso() !prof !14 {
 ; the non-consecutive loads/stores can be scalarized:
 ;
 ; CHECK: vector.body:
-; CHECK: store i16 0, i16* %{{.*}}, align 1
-; CHECK: store i16 0, i16* %{{.*}}, align 1
+; CHECK: store i16 0, ptr %{{.*}}, align 1
+; CHECK: store i16 0, ptr %{{.*}}, align 1
 ; CHECK: br i1 {{.*}}, label %vector.body
 ;
 entry:
@@ -146,8 +146,8 @@ for.body29:
   %i24.0170 = phi i16 [ 0, %entry], [ %inc37, %for.body29]
   %add33 = add i16 undef, %i24.0170
   %idxprom34 = zext i16 %add33 to i32
-  %arrayidx35 = getelementptr [2592 x i16], [2592 x i16] * @cm_array, i32 0, i32 %idxprom34
-  store i16 0, i16 * %arrayidx35, align 1
+  %arrayidx35 = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 %idxprom34
+  store i16 0, ptr %arrayidx35, align 1
   %inc37 = add i16 %i24.0170, 1
   %cmp26 = icmp ult i16 %inc37, 756
   br i1 %cmp26, label %for.body29, label %for.cond.cleanup28
@@ -221,7 +221,7 @@ exit:
 ; vectorization.
 
 ; NOTE: Some assertions have been autogenerated by utils/update_test_checks.py
-define void @stride1(i16* noalias %B, i32 %BStride) optsize {
+define void @stride1(ptr noalias %B, i32 %BStride) optsize {
 ; CHECK-LABEL: @stride1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -238,16 +238,16 @@ define void @stride1(i16* noalias %B, i32 %BStride) optsize {
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[B:%.*]], i32 [[TMP3]]
-; CHECK-NEXT:    store i16 42, i16* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i32 [[TMP3]]
+; CHECK-NEXT:    store i16 42, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.if1:
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[B]], i32 [[TMP6]]
-; CHECK-NEXT:    store i16 42, i16* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP6]]
+; CHECK-NEXT:    store i16 42, ptr [[TMP7]], align 4
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.continue2:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
@@ -274,8 +274,8 @@ entry:
 for.body:
   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
   %mulB = mul nsw i32 %iv, %BStride
-  %gepOfB = getelementptr inbounds i16, i16* %B, i32 %mulB
-  store i16 42, i16* %gepOfB, align 4
+  %gepOfB = getelementptr inbounds i16, ptr %B, i32 %mulB
+  store i16 42, ptr %gepOfB, align 4
   %iv.next = add nuw nsw i32 %iv, 1
   %exitcond = icmp eq i32 %iv.next, 1025
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
@@ -286,7 +286,7 @@ for.end:
 
 ; Vectorize with versioning for unit stride for PGSO and enabled vectorization.
 ;
-define void @stride1_pgso(i16* noalias %B, i32 %BStride) !prof !14 {
+define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 {
 ; CHECK-LABEL: @stride1_pgso(
 ; CHECK: vector.body
 ;
@@ -302,8 +302,8 @@ entry:
 for.body:
   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
   %mulB = mul nsw i32 %iv, %BStride
-  %gepOfB = getelementptr inbounds i16, i16* %B, i32 %mulB
-  store i16 42, i16* %gepOfB, align 4
+  %gepOfB = getelementptr inbounds i16, ptr %B, i32 %mulB
+  store i16 42, ptr %gepOfB, align 4
   %iv.next = add nuw nsw i32 %iv, 1
   %exitcond = icmp eq i32 %iv.next, 1025
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
@@ -328,8 +328,8 @@ entry:
 for.body:                                        ; preds = %for.body, %entry
   %l1.02 = phi i16 [ 1, %entry ], [ %inc9, %for.body ]
   %mul = mul nsw i16 %l1.02, %stride
-  %arrayidx6 = getelementptr inbounds [1 x i16], [1 x i16]* @g, i16 0, i16 %mul
-  %0 = load i16, i16* %arrayidx6, align 1
+  %arrayidx6 = getelementptr inbounds [1 x i16], ptr @g, i16 0, i16 %mul
+  %0 = load i16, ptr %arrayidx6, align 1
   %inc9 = add nuw nsw i16 %l1.02, 1
   %exitcond.not = icmp eq i16 %inc9, 16
   br i1 %exitcond.not, label %for.end, label %for.body
@@ -340,7 +340,7 @@ for.end:                                        ; preds = %for.body
 
 ; Make sure we do not crash while building the VPlan for the loop with the
 ; select below.
-define i32 @PR48142(i32* %ptr.start, i32* %ptr.end) optsize {
+define i32 @PR48142(ptr %ptr.start, ptr %ptr.end) optsize {
 ; CHECK-LABEL: PR48142
 ; CHECK-NOT: vector.body
 entry:
@@ -348,12 +348,12 @@ entry:
 
 for.body:
   %i.014 = phi i32 [ 20, %entry ], [ %cond, %for.body ]
-  %ptr.iv = phi i32* [ %ptr.start, %entry ], [ %ptr.next, %for.body ]
+  %ptr.iv = phi ptr [ %ptr.start, %entry ], [ %ptr.next, %for.body ]
   %cmp4 = icmp slt i32 %i.014, 99
   %cond = select i1 %cmp4, i32 99, i32 %i.014
-  store i32 0, i32* %ptr.iv
-  %ptr.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
-  %cmp.not = icmp eq i32* %ptr.next, %ptr.end
+  store i32 0, ptr %ptr.iv
+  %ptr.next = getelementptr inbounds i32, ptr %ptr.iv, i64 1
+  %cmp.not = icmp eq ptr %ptr.next, %ptr.end
   br i1 %cmp.not, label %exit, label %for.body
 
 exit:

diff  --git a/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll
index 18accecd0e087..7707a0b42182b 100644
--- a/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll
+++ b/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll
@@ -7,7 +7,7 @@
 
 ; Test from PR45958.
 
-define void @test([2000 x i32]* %src, i64 %n) {
+define void @test(ptr %src, i64 %n) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
@@ -27,10 +27,10 @@ define void @test([2000 x i32]* %src, i64 %n) {
 ; CHECK-NEXT:    br label [[LOOP_32:%.*]]
 ; CHECK:       loop.32:
 ; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, [[LOOP_2_HEADER1]] ], [ [[TMP2:%.*]], [[LOOP_32]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [2000 x i32], [2000 x i32]* [[SRC:%.*]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI3]]
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [2000 x i32], ptr [[SRC:%.*]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI3]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_GATHER]], <i32 10, i32 10, i32 10, i32 10>
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[TMP1]], <4 x i32*> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP1]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 ; CHECK-NEXT:    [[TMP2]] = add nuw nsw <4 x i64> [[VEC_PHI3]], <i64 1, i64 1, i64 1, i64 1>
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
@@ -61,10 +61,10 @@ define void @test([2000 x i32]* %src, i64 %n) {
 ; CHECK-NEXT:    br label [[LOOP_3:%.*]]
 ; CHECK:       loop.3:
 ; CHECK-NEXT:    [[IV_3:%.*]] = phi i64 [ 0, [[LOOP_2_HEADER]] ], [ [[IV_3_NEXT:%.*]], [[LOOP_3]] ]
-; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr inbounds [2000 x i32], [2000 x i32]* [[SRC]], i64 [[IV_1]], i64 [[IV_3]]
-; CHECK-NEXT:    [[L1:%.*]] = load i32, i32* [[GEP_SRC]], align 4
+; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr inbounds [2000 x i32], ptr [[SRC]], i64 [[IV_1]], i64 [[IV_3]]
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[GEP_SRC]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[L1]], 10
-; CHECK-NEXT:    store i32 [[MUL]], i32* [[GEP_SRC]], align 4
+; CHECK-NEXT:    store i32 [[MUL]], ptr [[GEP_SRC]], align 4
 ; CHECK-NEXT:    [[IV_3_NEXT]] = add nuw nsw i64 [[IV_3]], 1
 ; CHECK-NEXT:    [[EC_3:%.*]] = icmp eq i64 [[IV_3_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EC_3]], label [[LOOP_2_LATCH]], label [[LOOP_3]]
@@ -92,10 +92,10 @@ loop.2.header:
 
 loop.3:
   %iv.3 = phi i64 [ 0, %loop.2.header ], [ %iv.3.next, %loop.3 ]
-  %gep.src = getelementptr inbounds [2000 x i32], [2000 x i32]* %src, i64 %iv.1, i64 %iv.3
-  %l1 = load i32, i32* %gep.src, align 4
+  %gep.src = getelementptr inbounds [2000 x i32], ptr %src, i64 %iv.1, i64 %iv.3
+  %l1 = load i32, ptr %gep.src, align 4
   %mul = mul nsw i32 %l1, 10
-  store i32 %mul, i32* %gep.src, align 4
+  store i32 %mul, ptr %gep.src, align 4
   %iv.3.next = add nuw nsw i64 %iv.3, 1
   %ec.3 = icmp eq i64 %iv.3.next, %n
   br i1 %ec.3, label %loop.2.latch, label %loop.3

diff  --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll
index 6c62e4079c6d0..f3cb2933dfbc4 100644
--- a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll
+++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll
@@ -21,17 +21,17 @@
 ; CHECK-LABEL: vector.body:
 ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
 ; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ]
-; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, <4 x i64> %[[VecInd]]
+; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]]
 ; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
-; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[VecIndTr]], <4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 ; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
 ; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]]
 ; CHECK: br label %[[InnerLoop:.+]]
 
 ; CHECK: [[InnerLoop]]:
 ; CHECK: %[[InnerPhi:.*]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ]
-; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]]
-; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[StoreVal]], <4 x i32*> %[[AAddr2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true
+; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]]
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true
 ; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], <i64 1, i64 1, i64 1, i64 1>
 ; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], <i64 8, i64 8, i64 8, i64 8>
 ; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0
@@ -53,17 +53,17 @@ entry:
 
 for.body:                                         ; preds = %for.inc8, %entry
   %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]
-  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, i64 %indvars.iv21
+  %arrayidx = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, i64 %indvars.iv21
   %0 = trunc i64 %indvars.iv21 to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %1 = trunc i64 %indvars.iv21 to i32
   %add = add nsw i32 %1, %n
   br label %for.body3
 
 for.body3:                                        ; preds = %for.body3, %for.body
   %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
-  %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
-  store i32 %add, i32* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
+  store i32 %add, ptr %arrayidx7, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 8
   br i1 %exitcond, label %for.inc8, label %for.body3

diff  --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll
index f00a2b86f1acb..3bb85abe24b0c 100644
--- a/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll
+++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll
@@ -22,20 +22,20 @@
 ; CHECK-LABEL: vector.body:
 ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
 ; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ]
-; CHECK: %[[AAddr:.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, <4 x i64> %[[VecInd]]
-; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[CSplat]], <4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK: %[[AAddr:.*]] = getelementptr inbounds [1024 x i32], ptr @A, i64 0, <4 x i64> %[[VecInd]]
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[CSplat]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 ; CHECK: br i1 %[[ZeroTripChk]], label %[[InnerForPh:.*]], label %[[OuterInc:.*]]
 
 ; CHECK: [[InnerForPh]]:
-; CHECK: %[[WideAVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
+; CHECK: %[[WideAVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
 ; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
 ; CHECK: br label %[[InnerForBody:.*]]
 
 ; CHECK: [[InnerForBody]]:
 ; CHECK: %[[InnerInd:.*]] = phi <4 x i64> [ zeroinitializer, %[[InnerForPh]] ], [ %[[InnerIndNext:.*]], %[[InnerForBody]] ]
 ; CHECK: %[[AccumPhi:.*]] = phi <4 x i32> [ %[[WideAVal]], %[[InnerForPh]] ], [ %[[AccumPhiNext:.*]], %[[InnerForBody]] ]
-; CHECK: %[[BAddr:.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, <4 x i64> %[[InnerInd]]
-; CHECK: %[[WideBVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %[[BAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
+; CHECK: %[[BAddr:.*]] = getelementptr inbounds [1024 x i32], ptr @B, i64 0, <4 x i64> %[[InnerInd]]
+; CHECK: %[[WideBVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %[[BAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
 ; CHECK: %[[Add1:.*]] = add nsw <4 x i32> %[[WideBVal]], %[[VecIndTr]]
 ; CHECK: %[[AccumPhiNext]] = add nsw <4 x i32> %[[Add1]], %[[AccumPhi]]
 ; CHECK: %[[InnerIndNext]] = add nuw nsw <4 x i64> %[[InnerInd]], <i64 1, i64 1, i64 1, i64 1>
@@ -45,7 +45,7 @@
 
 ; CHECK: [[InnerCrit]]:
 ; CHECK: %[[StorePhi:.*]] = phi <4 x i32> [ %[[AccumPhiNext]], %[[InnerForBody]] ]
-; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[StorePhi]], <4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StorePhi]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 ; CHECK:  br label %[[ForInc]]
 
 ; CHECK: [[ForInc]]:
@@ -71,20 +71,20 @@ for.body.lr.ph:                                   ; preds = %entry
 
 for.body:                                         ; preds = %for.inc9, %for.body.lr.ph
   %indvars.iv25 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next26, %for.inc9 ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv25
-  store i32 %c, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv25
+  store i32 %c, ptr %arrayidx, align 4
   br i1 %cmp220, label %for.body3.lr.ph, label %for.inc9
 
 for.body3.lr.ph:                                  ; preds = %for.body
-  %arrayidx.promoted = load i32, i32* %arrayidx, align 4
+  %arrayidx.promoted = load i32, ptr %arrayidx, align 4
   %0 = trunc i64 %indvars.iv25 to i32
   br label %for.body3
 
 for.body3:                                        ; preds = %for.body3, %for.body3.lr.ph
   %indvars.iv = phi i64 [ 0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
   %1 = phi i32 [ %arrayidx.promoted, %for.body3.lr.ph ], [ %add8, %for.body3 ]
-  %arrayidx5 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx5, align 4
   %add = add nsw i32 %2, %0
   %add8 = add nsw i32 %add, %1
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
@@ -92,7 +92,7 @@ for.body3:                                        ; preds = %for.body3, %for.bod
   br i1 %exitcond, label %for.cond1.for.inc9_crit_edge, label %for.body3
 
 for.cond1.for.inc9_crit_edge:                     ; preds = %for.body3
-  store i32 %add8, i32* %arrayidx, align 4
+  store i32 %add8, ptr %arrayidx, align 4
   br label %for.inc9
 
 for.inc9:                                         ; preds = %for.cond1.for.inc9_crit_edge, %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/partial-lcssa.ll b/llvm/test/Transforms/LoopVectorize/partial-lcssa.ll
index 8010b4b6d6ec0..d700d484c56bc 100644
--- a/llvm/test/Transforms/LoopVectorize/partial-lcssa.ll
+++ b/llvm/test/Transforms/LoopVectorize/partial-lcssa.ll
@@ -5,12 +5,12 @@
 ; CHECK-LABEL: @foo
 ; CHECK-LABEL: for.end.inner.loopexit:
 ; CHECK: %[[LCSSAPHI:.*]] = phi i64 [ %indvars.iv, %for.body.inner ], [ %{{.*}}, %middle.block ]
-; CHECK: store i64 %[[LCSSAPHI]], i64* %O1, align 4
+; CHECK: store i64 %[[LCSSAPHI]], ptr %O1, align 4
 ; CHECK-LABEL: for.end.outer.loopexit
-; CHECK: store i64 %indvars.outer, i64* %O2, align 4
+; CHECK: store i64 %indvars.outer, ptr %O2, align 4
 
 
-define i64 @foo(i32* nocapture %A, i32* nocapture %B, i64 %n, i64 %m, i64* %O1, i64* %O2) {
+define i64 @foo(ptr nocapture %A, ptr nocapture %B, i64 %n, i64 %m, ptr %O1, ptr %O2) {
 entry:
   %cmp = icmp sgt i64 %n, 0
   br i1 %cmp, label %for.body.outer.preheader, label %for.end.outer
@@ -28,16 +28,16 @@ for.body.inner.preheader:                         ; preds = %for.body.outer
 
 for.body.inner:                                   ; preds = %for.body.inner.preheader, %for.body.inner
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body.inner ], [ 0, %for.body.inner.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %v = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  store i32 %v, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %v = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  store i32 %v, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, %n
   br i1 %exitcond, label %for.end.inner.loopexit, label %for.body.inner
 
 for.end.inner.loopexit:                           ; preds = %for.body.inner
-  store i64 %indvars.iv, i64 *%O1, align 4
+  store i64 %indvars.iv, ptr %O1, align 4
   br label %for.end.inner
 
 for.end.inner:                                    ; preds = %for.end.inner.loopexit, %for.body.outer
@@ -46,7 +46,7 @@ for.end.inner:                                    ; preds = %for.end.inner.loope
   br i1 %exitcond.outer, label %for.end.outer.loopexit, label %for.body.outer
 
 for.end.outer.loopexit:                           ; preds = %for.end.inner
-  store i64 %indvars.outer, i64 *%O2, align 4
+  store i64 %indvars.outer, ptr %O2, align 4
   br label %for.end.outer
 
 for.end.outer:                                    ; preds = %for.end.outer.loopexit, %entry

diff  --git a/llvm/test/Transforms/LoopVectorize/phi-cost.ll b/llvm/test/Transforms/LoopVectorize/phi-cost.ll
index d8f740068cb7e..e571b624ed194 100644
--- a/llvm/test/Transforms/LoopVectorize/phi-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/phi-cost.ll
@@ -9,22 +9,22 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 ; CHECK:       define void @phi_two_incoming_values(
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
-; CHECK:         [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* {{.*}}
+; CHECK:         [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr {{.*}}
 ; CHECK:         [[TMP5:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = zext <2 x i1> [[TMP5]] to <2 x i32>
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = add <2 x i32> [[WIDE_LOAD]], [[TMP6]]
-; CHECK:         store <2 x i32> [[PREDPHI]], <2 x i32>* {{.*}}
+; CHECK:         store <2 x i32> [[PREDPHI]], ptr {{.*}}
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ;
-define void @phi_two_incoming_values(i32* %a, i32* %b, i64 %n) {
+define void @phi_two_incoming_values(ptr %a, ptr %b, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ %i.next, %if.end ], [ 0, %entry ]
-  %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
-  %tmp1 = load i32, i32* %tmp0, align 4
-  %tmp2 = getelementptr inbounds i32, i32* %b, i64 %i
+  %tmp0 = getelementptr inbounds i32, ptr %a, i64 %i
+  %tmp1 = load i32, ptr %tmp0, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %b, i64 %i
   %tmp3 = icmp sgt i32 %tmp1, 0
   br i1 %tmp3, label %if.then, label %if.end
 
@@ -34,7 +34,7 @@ if.then:
 
 if.end:
   %tmp5 = phi i32 [ %tmp1, %for.body ], [ %tmp4, %if.then ]
-  store i32 %tmp5, i32* %tmp2, align 4
+  store i32 %tmp5, ptr %tmp2, align 4
   %i.next = add i64 %i, 1
   %cond = icmp eq i64 %i, %n
   br i1 %cond, label %for.end, label %for.body
@@ -51,19 +51,19 @@ for.end:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
 ; CHECK:         [[PREDPHI:%.*]] = select <2 x i1> {{.*}}, <2 x i32> <i32 3, i32 3>, <2 x i32> <i32 9, i32 9>
 ; CHECK:         [[PREDPHI7:%.*]] = select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> [[PREDPHI]]
-; CHECK:         store <2 x i32> [[PREDPHI7]], <2 x i32>* {{.*}}
+; CHECK:         store <2 x i32> [[PREDPHI7]], ptr {{.*}}
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ;
-define void @phi_three_incoming_values(i32* %a, i32* %b, i64 %n) {
+define void @phi_three_incoming_values(ptr %a, ptr %b, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ %i.next, %if.end ], [ 0, %entry ]
-  %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
-  %tmp1 = load i32, i32* %tmp0, align 4
-  %tmp2 = getelementptr inbounds i32, i32* %b, i64 %i
-  %tmp3 = load i32, i32* %tmp2, align 4
+  %tmp0 = getelementptr inbounds i32, ptr %a, i64 %i
+  %tmp1 = load i32, ptr %tmp0, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %b, i64 %i
+  %tmp3 = load i32, ptr %tmp2, align 4
   %tmp4 = icmp sgt i32 %tmp1, %tmp3
   br i1 %tmp4, label %if.then, label %if.end
 
@@ -78,7 +78,7 @@ if.else:
 
 if.end:
   %tmp8 = phi i32 [ 9, %for.body ], [ 3, %if.then ], [ %tmp7, %if.else ]
-  store i32 %tmp8, i32* %tmp0, align 4
+  store i32 %tmp8, ptr %tmp0, align 4
   %i.next = add i64 %i, 1
   %cond = icmp eq i64 %i, %n
   br i1 %cond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/pr25281.ll b/llvm/test/Transforms/LoopVectorize/pr25281.ll
index d062b4689d596..06031d7e925af 100644
--- a/llvm/test/Transforms/LoopVectorize/pr25281.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr25281.ll
@@ -4,16 +4,14 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; PR25281
 ; Just check that we don't crash on this test.
 ; CHECK-LABEL: @foo
-define void @foo(float** noalias nocapture readonly %in, i32* noalias nocapture readonly %isCompressed, float* noalias nocapture readonly %out) {
+define void @foo(ptr noalias nocapture readonly %in, ptr noalias nocapture readonly %isCompressed, ptr noalias nocapture readonly %out) {
 entry_block:
-  %tmp = getelementptr float*, float** %in, i32 0
-  %in_0 = load float*, float** %tmp
-  %tmp1 = getelementptr i32, i32* %isCompressed, i32 0
-  %isCompressed_0 = load i32, i32* %tmp1
-  %tmp2 = getelementptr float*, float** %in, i32 1
-  %in_1 = load float*, float** %tmp2
-  %tmp3 = getelementptr i32, i32* %isCompressed, i32 1
-  %isCompressed_1 = load i32, i32* %tmp3
+  %in_0 = load ptr, ptr %in
+  %isCompressed_0 = load i32, ptr %isCompressed
+  %tmp2 = getelementptr ptr, ptr %in, i32 1
+  %in_1 = load ptr, ptr %tmp2
+  %tmp3 = getelementptr i32, ptr %isCompressed, i32 1
+  %isCompressed_1 = load i32, ptr %tmp3
   br label %for_each_frames
 
 for_each_frames:
@@ -28,16 +26,16 @@ for_each_channel:
   %tmp5 = xor i32 %isCompressed_0, 1
   %tmp6 = mul i32 %frameIndex, %tmp5
   %offset0 = add i32 %tmp6, %channelIndex
-  %tmp7 = getelementptr float, float* %in_0, i32 %offset0
-  %in_0_index = load float, float* %tmp7, align 4
+  %tmp7 = getelementptr float, ptr %in_0, i32 %offset0
+  %in_0_index = load float, ptr %tmp7, align 4
   %tmp8 = xor i32 %isCompressed_1, 1
   %tmp9 = mul i32 %frameIndex, %tmp8
   %offset1 = add i32 %tmp9, %channelIndex
-  %tmp10 = getelementptr float, float* %in_1, i32 %offset1
-  %in_1_index = load float, float* %tmp10, align 4
+  %tmp10 = getelementptr float, ptr %in_1, i32 %offset1
+  %in_1_index = load float, ptr %tmp10, align 4
   %tmp11 = fadd float %in_0_index, %in_1_index
-  %tmp12 = getelementptr float, float* %out, i32 %tmp4
-  store float %tmp11, float* %tmp12, align 4
+  %tmp12 = getelementptr float, ptr %out, i32 %tmp4
+  store float %tmp11, ptr %tmp12, align 4
   %tmp13 = icmp eq i32 %nextChannelIndex, 2
   br i1 %tmp13, label %for_each_frames_end, label %for_each_channel
 

diff  --git a/llvm/test/Transforms/LoopVectorize/pr28541.ll b/llvm/test/Transforms/LoopVectorize/pr28541.ll
index 73f02a3a27d20..ad7f6e7b16b16 100644
--- a/llvm/test/Transforms/LoopVectorize/pr28541.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr28541.ll
@@ -30,14 +30,14 @@
 
 define i32 @fn1() {
 entry:
-  %tmp2 = load i32, i32* @b, align 4
+  %tmp2 = load i32, ptr @b, align 4
   %dec3 = add nsw i32 %tmp2, -1
-  store i32 %dec3, i32* @b, align 4
+  store i32 %dec3, ptr @b, align 4
   %tobool4 = icmp eq i32 %tmp2, 0
   br i1 %tobool4, label %while.end, label %while.body.lr.ph
 
 while.body.lr.ph:                                 ; preds = %entry
-  %tmp1 = load i32, i32* @a, align 4
+  %tmp1 = load i32, ptr @a, align 4
   %and = and i32 %tmp1, 3
   %switch = icmp eq i32 %and, 0
   br label %while.body
@@ -62,8 +62,8 @@ do.cond:                                          ; preds = %do.body, %while.bod
   br i1 %tobool3, label %while.cond, label %do.body
 
 while.cond.while.end_crit_edge:                   ; preds = %while.cond
-  store i32 0, i32* @c, align 4
-  store i32 -1, i32* @b, align 4
+  store i32 0, ptr @c, align 4
+  store i32 -1, ptr @b, align 4
   br label %while.end
 
 while.end:                                        ; preds = %while.cond.while.end_crit_edge, %entry

diff  --git a/llvm/test/Transforms/LoopVectorize/pr30806-phi-scev.ll b/llvm/test/Transforms/LoopVectorize/pr30806-phi-scev.ll
index 393e7bb2c50b9..7fa17850d8b1b 100644
--- a/llvm/test/Transforms/LoopVectorize/pr30806-phi-scev.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr30806-phi-scev.ll
@@ -24,17 +24,16 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 @theSize = external local_unnamed_addr global i32, align 4
 
-define void @foo(i8* %buf, i32 %denominator, i32* %flag) local_unnamed_addr {
+define void @foo(ptr %buf, i32 %denominator, ptr %flag) local_unnamed_addr {
 entry:
   %i = alloca i32, align 4
-  %0 = load i32, i32* @theSize, align 4
+  %0 = load i32, ptr @theSize, align 4
   %div = udiv i32 %0, %denominator
-  %1 = load i32, i32* %flag, align 4
+  %1 = load i32, ptr %flag, align 4
   %tobool5 = icmp eq i32 %1, 0
   br i1 %tobool5, label %while.end, label %while.body.lr.ph
 
 while.body.lr.ph:                                 ; preds = %entry
-  %2 = bitcast i32* %i to i8*
   br label %while.body
 
 while.body:                                       ; preds = %while.body.lr.ph, %while.body
@@ -43,16 +42,16 @@ while.body:                                       ; preds = %while.body.lr.ph, %
 ; CHECK:        phi
 ; CHECK-NEXT:   phi
 ; CHECK-NEXT:   sext
-  %buf.addr.07 = phi i8* [ %buf, %while.body.lr.ph ], [ %call, %while.body ]
+  %buf.addr.07 = phi ptr [ %buf, %while.body.lr.ph ], [ %call, %while.body ]
   %inx.06 = phi i32 [ 0, %while.body.lr.ph ], [ %add, %while.body ]
   %add = add nsw i32 %inx.06, %div
-  %3 = load i32, i32* @theSize, align 4
-  store i32 %3, i32* %i, align 4
+  %2 = load i32, ptr @theSize, align 4
+  store i32 %2, ptr %i, align 4
   %conv = sext i32 %add to i64
-  call void @bar(i32* nonnull %i, i64 %conv)
-  %call = call i8* @processBuf(i8* %buf.addr.07)
-  %4 = load i32, i32* %flag, align 4
-  %tobool = icmp eq i32 %4, 0
+  call void @bar(ptr nonnull %i, i64 %conv)
+  %call = call ptr @processBuf(ptr %buf.addr.07)
+  %3 = load i32, ptr %flag, align 4
+  %tobool = icmp eq i32 %3, 0
   br i1 %tobool, label %while.end.loopexit, label %while.body
 
 while.end.loopexit:                               ; preds = %while.body
@@ -62,5 +61,5 @@ while.end:                                        ; preds = %while.end.loopexit,
   ret void
 }
 
-declare void @bar(i32*, i64) local_unnamed_addr
-declare i8* @processBuf(i8*) local_unnamed_addr
+declare void @bar(ptr, i64) local_unnamed_addr
+declare ptr @processBuf(ptr) local_unnamed_addr

diff  --git a/llvm/test/Transforms/LoopVectorize/pr31098.ll b/llvm/test/Transforms/LoopVectorize/pr31098.ll
index 828c191745b7e..e983f53d87b04 100644
--- a/llvm/test/Transforms/LoopVectorize/pr31098.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr31098.ll
@@ -60,7 +60,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 %class.Complex = type { float, float }
 
-define void @Test(%class.Complex* nocapture %out, i64 %size) local_unnamed_addr {
+define void @Test(ptr nocapture %out, i64 %size) local_unnamed_addr {
 entry:
   %div = lshr i64 %size, 1
   %cmp47 = icmp eq i64 %div, 0
@@ -77,23 +77,23 @@ for.cond.cleanup:
 
 for.body:
   %offset.048 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %0 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %offset.048, i32 0
-  %1 = load float, float* %0, align 4
-  %imaginary_.i.i = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %offset.048, i32 1
-  %2 = load float, float* %imaginary_.i.i, align 4
+  %0 = getelementptr inbounds %class.Complex, ptr %out, i64 %offset.048, i32 0
+  %1 = load float, ptr %0, align 4
+  %imaginary_.i.i = getelementptr inbounds %class.Complex, ptr %out, i64 %offset.048, i32 1
+  %2 = load float, ptr %imaginary_.i.i, align 4
   %add = add nuw i64 %offset.048, %div
-  %3 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %add, i32 0
-  %4 = load float, float* %3, align 4
-  %imaginary_.i.i28 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %add, i32 1
-  %5 = load float, float* %imaginary_.i.i28, align 4
+  %3 = getelementptr inbounds %class.Complex, ptr %out, i64 %add, i32 0
+  %4 = load float, ptr %3, align 4
+  %imaginary_.i.i28 = getelementptr inbounds %class.Complex, ptr %out, i64 %add, i32 1
+  %5 = load float, ptr %imaginary_.i.i28, align 4
   %add.i = fadd fast float %4, %1
   %add4.i = fadd fast float %5, %2
-  store float %add.i, float* %0, align 4
-  store float %add4.i, float* %imaginary_.i.i, align 4
+  store float %add.i, ptr %0, align 4
+  store float %add4.i, ptr %imaginary_.i.i, align 4
   %sub.i = fsub fast float %1, %4
   %sub4.i = fsub fast float %2, %5
-  store float %sub.i, float* %3, align 4
-  store float %sub4.i, float* %imaginary_.i.i28, align 4
+  store float %sub.i, ptr %3, align 4
+  store float %sub4.i, ptr %imaginary_.i.i28, align 4
   %inc = add nuw nsw i64 %offset.048, 1
   %exitcond = icmp eq i64 %inc, %div
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/pr31190.ll b/llvm/test/Transforms/LoopVectorize/pr31190.ll
index c9790be82209b..b6cacf1a7fe87 100644
--- a/llvm/test/Transforms/LoopVectorize/pr31190.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr31190.ll
@@ -36,8 +36,8 @@
 ; CHECK-LABEL: @test
 define void @test() {
 entry:
-  %a.promoted2 = load i32, i32* @a, align 1
-  %c.promoted = load i32, i32* @c, align 1
+  %a.promoted2 = load i32, ptr @a, align 1
+  %c.promoted = load i32, ptr @c, align 1
   br label %for.cond1.preheader
 
 for.cond1.preheader:                              ; preds = %for.cond1.for.inc4_crit_edge, %entry
@@ -49,8 +49,8 @@ for.body3:                                        ; preds = %for.body3, %for.con
   %inc1 = phi i32 [ %inc.lcssa3, %for.cond1.preheader ], [ %inc, %for.body3 ]
   %0 = phi i32 [ undef, %for.cond1.preheader ], [ %inc54, %for.body3 ]
   %idxprom = sext i32 %0 to i64
-  %arrayidx = getelementptr inbounds [1 x i32], [1 x i32]* @b, i64 0, i64 %idxprom
-  store i32 4, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1 x i32], ptr @b, i64 0, i64 %idxprom
+  store i32 4, ptr %arrayidx, align 4
   %inc = add nsw i32 %inc1, 1
   %tobool2 = icmp eq i32 %inc, 0
   br i1 %tobool2, label %for.cond1.for.inc4_crit_edge, label %for.body3

diff  --git a/llvm/test/Transforms/LoopVectorize/pr33706.ll b/llvm/test/Transforms/LoopVectorize/pr33706.ll
index fe314d75d2a46..20d58f4cf47ff 100644
--- a/llvm/test/Transforms/LoopVectorize/pr33706.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr33706.ll
@@ -6,17 +6,17 @@
 
 ; CHECK-LABEL: @PR33706
 ; CHECK-NOT: <2 x i32>
-define void @PR33706(float* nocapture readonly %arg, float* nocapture %arg1, i32 %arg2) local_unnamed_addr {
+define void @PR33706(ptr nocapture readonly %arg, ptr nocapture %arg1, i32 %arg2) local_unnamed_addr {
 bb:
-  %tmp = load i32, i32* @global.1, align 4
-  %tmp3 = getelementptr inbounds float, float* %arg, i64 190
-  %tmp4 = getelementptr inbounds float, float* %arg1, i64 512
+  %tmp = load i32, ptr @global.1, align 4
+  %tmp3 = getelementptr inbounds float, ptr %arg, i64 190
+  %tmp4 = getelementptr inbounds float, ptr %arg1, i64 512
   %tmp5 = and i32 %tmp, 65535
   %tmp6 = icmp ugt i32 %arg2, 65536
   br i1 %tmp6, label %bb7, label %bb9
 
 bb7:                                              ; preds = %bb
-  %tmp8 = load i32, i32* @global, align 4
+  %tmp8 = load i32, ptr @global, align 4
   br label %bb27
 
 bb9:                                              ; preds = %bb
@@ -25,37 +25,37 @@ bb9:                                              ; preds = %bb
 
 bb11:                                             ; preds = %bb11, %bb9
   %tmp12 = phi i32 [ %tmp20, %bb11 ], [ %tmp5, %bb9 ]
-  %tmp13 = phi float* [ %tmp18, %bb11 ], [ %tmp4, %bb9 ]
+  %tmp13 = phi ptr [ %tmp18, %bb11 ], [ %tmp4, %bb9 ]
   %tmp14 = phi i32 [ %tmp16, %bb11 ], [ %tmp10, %bb9 ]
   %tmp15 = phi i32 [ %tmp19, %bb11 ], [ %tmp, %bb9 ]
   %tmp16 = add nsw i32 %tmp14, -1
   %tmp17 = sitofp i32 %tmp12 to float
-  store float %tmp17, float* %tmp13, align 4
-  %tmp18 = getelementptr inbounds float, float* %tmp13, i64 1
+  store float %tmp17, ptr %tmp13, align 4
+  %tmp18 = getelementptr inbounds float, ptr %tmp13, i64 1
   %tmp19 = add i32 %tmp15, %arg2
   %tmp20 = and i32 %tmp19, 65535
   %tmp21 = icmp eq i32 %tmp16, 0
   br i1 %tmp21, label %bb22, label %bb11
 
 bb22:                                             ; preds = %bb11
-  %tmp23 = phi float* [ %tmp18, %bb11 ]
+  %tmp23 = phi ptr [ %tmp18, %bb11 ]
   %tmp24 = phi i32 [ %tmp19, %bb11 ]
   %tmp25 = phi i32 [ %tmp20, %bb11 ]
   %tmp26 = ashr i32 %tmp24, 16
-  store i32 %tmp26, i32* @global, align 4
+  store i32 %tmp26, ptr @global, align 4
   br label %bb27
 
 bb27:                                             ; preds = %bb22, %bb7
   %tmp28 = phi i32 [ %tmp26, %bb22 ], [ %tmp8, %bb7 ]
-  %tmp29 = phi float* [ %tmp23, %bb22 ], [ %tmp4, %bb7 ]
+  %tmp29 = phi ptr [ %tmp23, %bb22 ], [ %tmp4, %bb7 ]
   %tmp30 = phi i32 [ %tmp25, %bb22 ], [ %tmp5, %bb7 ]
   %tmp31 = sext i32 %tmp28 to i64
-  %tmp32 = getelementptr inbounds float, float* %tmp3, i64 %tmp31
-  %tmp33 = load float, float* %tmp32, align 4
+  %tmp32 = getelementptr inbounds float, ptr %tmp3, i64 %tmp31
+  %tmp33 = load float, ptr %tmp32, align 4
   %tmp34 = sitofp i32 %tmp30 to float
-  %tmp35 = load float, float* @global.2, align 4
+  %tmp35 = load float, ptr @global.2, align 4
   %tmp36 = fmul float %tmp35, %tmp34
   %tmp37 = fadd float %tmp33, %tmp36
-  store float %tmp37, float* %tmp29, align 4
+  store float %tmp37, ptr %tmp29, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/pr34681.ll b/llvm/test/Transforms/LoopVectorize/pr34681.ll
index 8ee43d4107a5c..1c2b37e2ce2fb 100644
--- a/llvm/test/Transforms/LoopVectorize/pr34681.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr34681.ll
@@ -32,7 +32,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; CHECK: scalar.ph
 
 
-define i32 @foo1(i32 %N, i16* nocapture readnone %A, i16* nocapture readonly %B, i32 %i, i32 %j)  {
+define i32 @foo1(i32 %N, ptr nocapture readnone %A, ptr nocapture readonly %B, i32 %i, i32 %j)  {
 entry:
   %cmp8 = icmp eq i32 %N, 0
   br i1 %cmp8, label %for.end, label %for.body.lr.ph
@@ -45,8 +45,8 @@ for.body:
   %k.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %mul = mul i32 %k.09, %N
   %add = add i32 %mul, %j
-  %arrayidx = getelementptr inbounds i16, i16* %B, i32 %add
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
+  %0 = load i16, ptr %arrayidx, align 2
   %conv = sext i16 %0 to i32
   %add1 = add nsw i32 %tmp.010, %conv
   %inc = add nuw i32 %k.09, 1
@@ -92,7 +92,7 @@ for.end:
 ; CHECK: middle.block
 ; CHECK: scalar.ph
 
-define i32 @foo2(i16 zeroext %N, i16* nocapture readnone %A, i16* nocapture readonly %B, i32 %i, i32 %j) {
+define i32 @foo2(i16 zeroext %N, ptr nocapture readnone %A, ptr nocapture readonly %B, i32 %i, i32 %j) {
 entry:
   %conv = zext i16 %N to i32
   %cmp11 = icmp eq i16 %N, 0
@@ -106,8 +106,8 @@ for.body:
   %k.012 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %mul = mul nuw i32 %k.012, %conv
   %add = add i32 %mul, %j
-  %arrayidx = getelementptr inbounds i16, i16* %B, i32 %add
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
+  %0 = load i16, ptr %arrayidx, align 2
   %conv3 = sext i16 %0 to i32
   %add4 = add nsw i32 %tmp.013, %conv3
   %inc = add nuw nsw i32 %k.012, 1

diff  --git a/llvm/test/Transforms/LoopVectorize/pr36311.ll b/llvm/test/Transforms/LoopVectorize/pr36311.ll
index 9d61af6cf9af8..442004e8c082e 100644
--- a/llvm/test/Transforms/LoopVectorize/pr36311.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr36311.ll
@@ -13,7 +13,7 @@ $test = comdat any
 declare i32 @__gxx_personality_v0(...)
 
 ; Function Attrs: uwtable
-define dso_local void @test() local_unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define dso_local void @test() local_unnamed_addr #0 comdat align 2 personality ptr @__gxx_personality_v0 {
 entry:
   br label %for.body51
 
@@ -38,8 +38,8 @@ for.body89:                                       ; preds = %for.body89, %for.bo
   %add93 = add i32 %add92, %mul91
   %inc94 = add i32 %next_index.5174, 1
   %conv95 = zext i32 %next_index.5174 to i64
-  %arrayidx.i160 = getelementptr inbounds i32, i32* undef, i64 %conv95
-  store i32 %add93, i32* %arrayidx.i160, align 4
+  %arrayidx.i160 = getelementptr inbounds i32, ptr undef, i64 %conv95
+  store i32 %add93, ptr %arrayidx.i160, align 4
 ;, !tbaa !1
   %cmp87 = icmp ult i32 %add92, undef
   br i1 %cmp87, label %for.body89, label %for.cond80.loopexit

diff  --git a/llvm/test/Transforms/LoopVectorize/pr37248.ll b/llvm/test/Transforms/LoopVectorize/pr37248.ll
index c9d22bec660ab..64c102febb36e 100644
--- a/llvm/test/Transforms/LoopVectorize/pr37248.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr37248.ll
@@ -26,13 +26,13 @@ for.body:                                         ; preds = %land.end, %entry
   br i1 undef, label %land.end, label %land.rhs
 
 land.rhs:                                         ; preds = %for.body
-  %1 = load i32, i32* undef, align 1
+  %1 = load i32, ptr undef, align 1
   br label %land.end
 
 land.end:                                         ; preds = %land.rhs, %for.body
   %2 = trunc i32 %0 to i16
-  %arrayidx = getelementptr inbounds [2 x i16], [2 x i16]* @a, i16 0, i16 %2
-  store i16 undef, i16* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 %2
+  store i16 undef, ptr %arrayidx, align 1
   %dec = add nsw i32 %0, -1
   %cmp = icmp sgt i32 %0, 1
   br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge

diff  --git a/llvm/test/Transforms/LoopVectorize/pr39099.ll b/llvm/test/Transforms/LoopVectorize/pr39099.ll
index 7f91eeed14579..9c2eb3d1ebe4e 100644
--- a/llvm/test/Transforms/LoopVectorize/pr39099.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr39099.ll
@@ -10,7 +10,7 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
 ; CHECK: LV: Analyzing interleaved accesses...
 ; CHECK-NOT: LV: Creating an interleave group
 
-define dso_local void @masked_strided(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr {
+define dso_local void @masked_strided(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr {
 entry:
   %conv = zext i8 %guard to i32
   br label %for.body
@@ -22,14 +22,14 @@ for.body:
 
 if.then:
   %mul = shl nuw nsw i32 %ix.017, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx4 = getelementptr inbounds i8, i8* %q, i32 %mul
-  store i8 %0, i8* %arrayidx4, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx4 = getelementptr inbounds i8, ptr %q, i32 %mul
+  store i8 %0, ptr %arrayidx4, align 1
   %sub = sub i8 0, %0
   %add = or i32 %mul, 1
-  %arrayidx8 = getelementptr inbounds i8, i8* %q, i32 %add
-  store i8 %sub, i8* %arrayidx8, align 1
+  %arrayidx8 = getelementptr inbounds i8, ptr %q, i32 %add
+  store i8 %sub, ptr %arrayidx8, align 1
   br label %for.inc
 
 for.inc:

diff  --git a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll
index d01f9770a8b9e..06f70a4cccb5d 100644
--- a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll
@@ -16,7 +16,7 @@ define i16 @test_true_and_false_branch_equal() {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_SREM_CONTINUE2:%.*]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* @v_38, align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr @v_38, align 1
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[TMP0]], i32 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i16> [[BROADCAST_SPLAT]], zeroinitializer
@@ -39,7 +39,7 @@ define i16 @test_true_and_false_branch_equal() {
 ; CHECK-NEXT:    [[TMP10:%.*]] = phi <2 x i16> [ [[TMP6]], [[PRED_SREM_CONTINUE]] ], [ [[TMP9]], [[PRED_SREM_IF1]] ]
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> <i16 5786, i16 5786>, <2 x i16> [[TMP10]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1
-; CHECK-NEXT:    store i16 [[TMP11]], i16* @v_39, align 1
+; CHECK-NEXT:    store i16 [[TMP11]], ptr @v_39, align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -51,7 +51,7 @@ define i16 @test_true_and_false_branch_equal() {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_07:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC7:%.*]], [[FOR_LATCH:%.*]] ]
-; CHECK-NEXT:    [[LV:%.*]] = load i16, i16* @v_38, align 1
+; CHECK-NEXT:    [[LV:%.*]] = load i16, ptr @v_38, align 1
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i16 [[LV]], 32767
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[COND_END:%.*]], label [[COND_END]]
 ; CHECK:       cond.end:
@@ -62,12 +62,12 @@ define i16 @test_true_and_false_branch_equal() {
 ; CHECK-NEXT:    br label [[FOR_LATCH]]
 ; CHECK:       for.latch:
 ; CHECK-NEXT:    [[COND6:%.*]] = phi i16 [ [[REM]], [[COND_FALSE4]] ], [ 5786, [[COND_END]] ]
-; CHECK-NEXT:    store i16 [[COND6]], i16* @v_39, align 1
+; CHECK-NEXT:    store i16 [[COND6]], ptr @v_39, align 1
 ; CHECK-NEXT:    [[INC7]] = add nsw i16 [[I_07]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[INC7]], 111
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[RV:%.*]] = load i16, i16* @v_39, align 1
+; CHECK-NEXT:    [[RV:%.*]] = load i16, ptr @v_39, align 1
 ; CHECK-NEXT:    ret i16 [[RV]]
 ;
 entry:
@@ -75,7 +75,7 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.latch
   %i.07 = phi i16 [ 99, %entry ], [ %inc7, %for.latch ]
-  %lv = load i16, i16* @v_38, align 1
+  %lv = load i16, ptr @v_38, align 1
   %cmp1 = icmp eq i16 %lv, 32767
   br i1 %cmp1, label %cond.end, label %cond.end
 
@@ -89,12 +89,12 @@ cond.false4:                                      ; preds = %cond.end
 
 for.latch:                                        ; preds = %cond.end, %cond.false4
   %cond6 = phi i16 [ %rem, %cond.false4 ], [ 5786, %cond.end ]
-  store i16 %cond6, i16* @v_39, align 1
+  store i16 %cond6, ptr @v_39, align 1
   %inc7 = add nsw i16 %i.07, 1
   %cmp = icmp slt i16 %inc7, 111
   br i1 %cmp, label %for.body, label %exit
 
 exit:                                 ; preds = %for.latch
-  %rv = load i16, i16* @v_39, align 1
+  %rv = load i16, ptr @v_39, align 1
   ret i16 %rv
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/pr45525.ll b/llvm/test/Transforms/LoopVectorize/pr45525.ll
index 23034210c5bc9..e52d98b63abfd 100644
--- a/llvm/test/Transforms/LoopVectorize/pr45525.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr45525.ll
@@ -2,7 +2,7 @@
 
 ; Test case for PR45525. Checks that phi's with a single predecessor and a mask are supported.
 
-define void @main(i1 %cond, i32* %arr) {
+define void @main(i1 %cond, ptr %arr) {
 ; CHECK-LABEL: @main(
 ; CHECK-NEXT:  bb.0:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -26,8 +26,8 @@ bb.2:                                             ; preds = %bb.1
 
 bb.3:                                             ; preds = %bb.2, %bb.1
   %stored.value = phi i32 [ 7, %bb.1 ], [ %mult, %bb.2 ]
-  %arrayidx = getelementptr inbounds i32, i32* %arr, i32 %iv
-  store i32 %stored.value, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %arr, i32 %iv
+  store i32 %stored.value, ptr %arrayidx
   %iv.next = add i32 %iv, 1
   %continue = icmp ult i32 %iv.next, 32
   br i1 %continue, label %bb.1, label %bb.4

diff  --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
index 524bd213997ed..e81fb66239bd4 100644
--- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
@@ -10,7 +10,7 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 ; -force-vector-interleave, but is a multiple of the internally computed MaxVF;
 ; e.g., when all types are i32 lead to MaxVF=1.
 
-define void @pr45679(i32* %A) optsize {
+define void @pr45679(ptr %A) optsize {
 ; CHECK-LABEL: @pr45679(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -24,32 +24,32 @@ define void @pr45679(i32* %A) optsize {
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP2]]
-; CHECK-NEXT:    store i32 13, i32* [[TMP3]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP2]]
+; CHECK-NEXT:    store i32 13, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
 ; CHECK:       pred.store.if1:
 ; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP5]]
-; CHECK-NEXT:    store i32 13, i32* [[TMP6]], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP5]]
+; CHECK-NEXT:    store i32 13, ptr [[TMP6]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.continue2:
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
 ; CHECK:       pred.store.if3:
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP8]]
-; CHECK-NEXT:    store i32 13, i32* [[TMP9]], align 1
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP8]]
+; CHECK-NEXT:    store i32 13, ptr [[TMP9]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; CHECK:       pred.store.continue4:
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.if5:
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP11]]
-; CHECK-NEXT:    store i32 13, i32* [[TMP12]], align 1
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
+; CHECK-NEXT:    store i32 13, ptr [[TMP12]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.continue6:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
@@ -63,8 +63,8 @@ define void @pr45679(i32* %A) optsize {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[RIV]]
-; CHECK-NEXT:    store i32 13, i32* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[RIV]]
+; CHECK-NEXT:    store i32 13, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14
 ; CHECK-NEXT:    br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -86,32 +86,32 @@ define void @pr45679(i32* %A) optsize {
 ; VF2UF2-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VF2UF2:       pred.store.if:
 ; VF2UF2-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
-; VF2UF2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP3]]
-; VF2UF2-NEXT:    store i32 13, i32* [[TMP4]], align 1
+; VF2UF2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP3]]
+; VF2UF2-NEXT:    store i32 13, ptr [[TMP4]], align 1
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VF2UF2:       pred.store.continue:
 ; VF2UF2-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
 ; VF2UF2-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]]
 ; VF2UF2:       pred.store.if2:
 ; VF2UF2-NEXT:    [[TMP6:%.*]] = add i32 [[INDEX]], 1
-; VF2UF2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP6]]
-; VF2UF2-NEXT:    store i32 13, i32* [[TMP7]], align 1
+; VF2UF2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP6]]
+; VF2UF2-NEXT:    store i32 13, ptr [[TMP7]], align 1
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE3]]
 ; VF2UF2:       pred.store.continue3:
 ; VF2UF2-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
 ; VF2UF2-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
 ; VF2UF2:       pred.store.if4:
 ; VF2UF2-NEXT:    [[TMP9:%.*]] = add i32 [[INDEX]], 2
-; VF2UF2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP9]]
-; VF2UF2-NEXT:    store i32 13, i32* [[TMP10]], align 1
+; VF2UF2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP9]]
+; VF2UF2-NEXT:    store i32 13, ptr [[TMP10]], align 1
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE5]]
 ; VF2UF2:       pred.store.continue5:
 ; VF2UF2-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
 ; VF2UF2-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]]
 ; VF2UF2:       pred.store.if6:
 ; VF2UF2-NEXT:    [[TMP12:%.*]] = add i32 [[INDEX]], 3
-; VF2UF2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP12]]
-; VF2UF2-NEXT:    store i32 13, i32* [[TMP13]], align 1
+; VF2UF2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP12]]
+; VF2UF2-NEXT:    store i32 13, ptr [[TMP13]], align 1
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE7]]
 ; VF2UF2:       pred.store.continue7:
 ; VF2UF2-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
@@ -125,8 +125,8 @@ define void @pr45679(i32* %A) optsize {
 ; VF2UF2-NEXT:    br label [[LOOP:%.*]]
 ; VF2UF2:       loop:
 ; VF2UF2-NEXT:    [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ]
-; VF2UF2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[RIV]]
-; VF2UF2-NEXT:    store i32 13, i32* [[ARRAYIDX]], align 1
+; VF2UF2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[RIV]]
+; VF2UF2-NEXT:    store i32 13, ptr [[ARRAYIDX]], align 1
 ; VF2UF2-NEXT:    [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
 ; VF2UF2-NEXT:    [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14
 ; VF2UF2-NEXT:    br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -151,29 +151,29 @@ define void @pr45679(i32* %A) optsize {
 ; VF1UF4-NEXT:    br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VF1UF4:       pred.store.if:
 ; VF1UF4-NEXT:    [[INDUCTION:%.*]] = add i32 [[INDEX]], 0
-; VF1UF4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDUCTION]]
-; VF1UF4-NEXT:    store i32 13, i32* [[TMP4]], align 1
+; VF1UF4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDUCTION]]
+; VF1UF4-NEXT:    store i32 13, ptr [[TMP4]], align 1
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VF1UF4:       pred.store.continue:
 ; VF1UF4-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
 ; VF1UF4:       pred.store.if4:
 ; VF1UF4-NEXT:    [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1
-; VF1UF4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION1]]
-; VF1UF4-NEXT:    store i32 13, i32* [[TMP5]], align 1
+; VF1UF4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDUCTION1]]
+; VF1UF4-NEXT:    store i32 13, ptr [[TMP5]], align 1
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE8]]
 ; VF1UF4:       pred.store.continue5:
 ; VF1UF4-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
 ; VF1UF4:       pred.store.if6:
 ; VF1UF4-NEXT:    [[INDUCTION2:%.*]] = add i32 [[INDEX]], 2
-; VF1UF4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION2]]
-; VF1UF4-NEXT:    store i32 13, i32* [[TMP6]], align 1
+; VF1UF4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDUCTION2]]
+; VF1UF4-NEXT:    store i32 13, ptr [[TMP6]], align 1
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE10]]
 ; VF1UF4:       pred.store.continue7:
 ; VF1UF4-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]]
 ; VF1UF4:       pred.store.if8:
 ; VF1UF4-NEXT:    [[INDUCTION3:%.*]] = add i32 [[INDEX]], 3
-; VF1UF4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION3]]
-; VF1UF4-NEXT:    store i32 13, i32* [[TMP7]], align 1
+; VF1UF4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDUCTION3]]
+; VF1UF4-NEXT:    store i32 13, ptr [[TMP7]], align 1
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE12]]
 ; VF1UF4:       pred.store.continue9:
 ; VF1UF4-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
@@ -186,8 +186,8 @@ define void @pr45679(i32* %A) optsize {
 ; VF1UF4-NEXT:    br label [[LOOP:%.*]]
 ; VF1UF4:       loop:
 ; VF1UF4-NEXT:    [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ]
-; VF1UF4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[RIV]]
-; VF1UF4-NEXT:    store i32 13, i32* [[ARRAYIDX]], align 1
+; VF1UF4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[RIV]]
+; VF1UF4-NEXT:    store i32 13, ptr [[ARRAYIDX]], align 1
 ; VF1UF4-NEXT:    [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
 ; VF1UF4-NEXT:    [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14
 ; VF1UF4-NEXT:    br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -199,8 +199,8 @@ entry:
 
 loop:
   %riv = phi i32 [ 0, %entry ], [ %rivPlus1, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %riv
-  store i32 13, i32* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %riv
+  store i32 13, ptr %arrayidx, align 1
   %rivPlus1 = add nuw nsw i32 %riv, 1
   %cond = icmp eq i32 %rivPlus1, 14
   br i1 %cond, label %exit, label %loop
@@ -209,7 +209,7 @@ exit:
   ret void
 }
 
-define void @load_variant(i64* noalias %a, i64* noalias %b) {
+define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; CHECK-LABEL: @load_variant(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -223,9 +223,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8
-; CHECK-NEXT:    store i64 [[TMP4]], i64* [[B:%.*]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
+; CHECK-NEXT:    store i64 [[TMP4]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP5:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_STORE_IF]] ]
@@ -233,9 +233,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
 ; CHECK:       pred.store.if1:
 ; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8
-; CHECK-NEXT:    store i64 [[TMP9]], i64* [[B]], align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8
+; CHECK-NEXT:    store i64 [[TMP9]], ptr [[B]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.continue2:
 ; CHECK-NEXT:    [[TMP10:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP9]], [[PRED_STORE_IF1]] ]
@@ -243,9 +243,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
 ; CHECK:       pred.store.if3:
 ; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP12]]
-; CHECK-NEXT:    [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8
-; CHECK-NEXT:    store i64 [[TMP14]], i64* [[B]], align 8
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8
+; CHECK-NEXT:    store i64 [[TMP14]], ptr [[B]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; CHECK:       pred.store.continue4:
 ; CHECK-NEXT:    [[TMP15:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE2]] ], [ [[TMP14]], [[PRED_STORE_IF3]] ]
@@ -253,9 +253,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.if5:
 ; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = load i64, i64* [[TMP18]], align 8
-; CHECK-NEXT:    store i64 [[TMP19]], i64* [[B]], align 8
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP17]]
+; CHECK-NEXT:    [[TMP19:%.*]] = load i64, ptr [[TMP18]], align 8
+; CHECK-NEXT:    store i64 [[TMP19]], ptr [[B]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.continue6:
 ; CHECK-NEXT:    [[TMP20:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP19]], [[PRED_STORE_IF5]] ]
@@ -270,9 +270,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]]
-; CHECK-NEXT:    [[V:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
-; CHECK-NEXT:    store i64 [[V]], i64* [[B]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[V:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT:    store i64 [[V]], ptr [[B]], align 8
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -294,9 +294,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; VF2UF2-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VF2UF2:       pred.store.if:
 ; VF2UF2-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; VF2UF2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP3]]
-; VF2UF2-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-; VF2UF2-NEXT:    store i64 [[TMP5]], i64* [[B:%.*]], align 8
+; VF2UF2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]]
+; VF2UF2-NEXT:    [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8
+; VF2UF2-NEXT:    store i64 [[TMP5]], ptr [[B:%.*]], align 8
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VF2UF2:       pred.store.continue:
 ; VF2UF2-NEXT:    [[TMP6:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_STORE_IF]] ]
@@ -304,9 +304,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; VF2UF2-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]]
 ; VF2UF2:       pred.store.if2:
 ; VF2UF2-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 1
-; VF2UF2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP8]]
-; VF2UF2-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP9]], align 8
-; VF2UF2-NEXT:    store i64 [[TMP10]], i64* [[B]], align 8
+; VF2UF2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
+; VF2UF2-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8
+; VF2UF2-NEXT:    store i64 [[TMP10]], ptr [[B]], align 8
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE3]]
 ; VF2UF2:       pred.store.continue3:
 ; VF2UF2-NEXT:    [[TMP11:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP10]], [[PRED_STORE_IF2]] ]
@@ -314,9 +314,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; VF2UF2-NEXT:    br i1 [[TMP12]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
 ; VF2UF2:       pred.store.if4:
 ; VF2UF2-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 2
-; VF2UF2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP13]]
-; VF2UF2-NEXT:    [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8
-; VF2UF2-NEXT:    store i64 [[TMP15]], i64* [[B]], align 8
+; VF2UF2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP13]]
+; VF2UF2-NEXT:    [[TMP15:%.*]] = load i64, ptr [[TMP14]], align 8
+; VF2UF2-NEXT:    store i64 [[TMP15]], ptr [[B]], align 8
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE5]]
 ; VF2UF2:       pred.store.continue5:
 ; VF2UF2-NEXT:    [[TMP16:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE3]] ], [ [[TMP15]], [[PRED_STORE_IF4]] ]
@@ -324,9 +324,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; VF2UF2-NEXT:    br i1 [[TMP17]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]]
 ; VF2UF2:       pred.store.if6:
 ; VF2UF2-NEXT:    [[TMP18:%.*]] = add i64 [[INDEX]], 3
-; VF2UF2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP18]]
-; VF2UF2-NEXT:    [[TMP20:%.*]] = load i64, i64* [[TMP19]], align 8
-; VF2UF2-NEXT:    store i64 [[TMP20]], i64* [[B]], align 8
+; VF2UF2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP18]]
+; VF2UF2-NEXT:    [[TMP20:%.*]] = load i64, ptr [[TMP19]], align 8
+; VF2UF2-NEXT:    store i64 [[TMP20]], ptr [[B]], align 8
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE7]]
 ; VF2UF2:       pred.store.continue7:
 ; VF2UF2-NEXT:    [[TMP21:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE5]] ], [ [[TMP20]], [[PRED_STORE_IF6]] ]
@@ -341,9 +341,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; VF2UF2-NEXT:    br label [[FOR_BODY:%.*]]
 ; VF2UF2:       for.body:
 ; VF2UF2-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; VF2UF2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]]
-; VF2UF2-NEXT:    [[V:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
-; VF2UF2-NEXT:    store i64 [[V]], i64* [[B]], align 8
+; VF2UF2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; VF2UF2-NEXT:    [[V:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
+; VF2UF2-NEXT:    store i64 [[V]], ptr [[B]], align 8
 ; VF2UF2-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; VF2UF2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14
 ; VF2UF2-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -368,36 +368,36 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; VF1UF4-NEXT:    br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VF1UF4:       pred.store.if:
 ; VF1UF4-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
-; VF1UF4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDUCTION]]
-; VF1UF4-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-; VF1UF4-NEXT:    store i64 [[TMP5]], i64* [[B:%.*]], align 8
+; VF1UF4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDUCTION]]
+; VF1UF4-NEXT:    [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8
+; VF1UF4-NEXT:    store i64 [[TMP5]], ptr [[B:%.*]], align 8
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VF1UF4:       pred.store.continue:
 ; VF1UF4-NEXT:    [[TMP6:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_STORE_IF]] ]
 ; VF1UF4-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
 ; VF1UF4:       pred.store.if4:
 ; VF1UF4-NEXT:    [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
-; VF1UF4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION1]]
-; VF1UF4-NEXT:    [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8
-; VF1UF4-NEXT:    store i64 [[TMP8]], i64* [[B]], align 8
+; VF1UF4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDUCTION1]]
+; VF1UF4-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8
+; VF1UF4-NEXT:    store i64 [[TMP8]], ptr [[B]], align 8
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE8]]
 ; VF1UF4:       pred.store.continue5:
 ; VF1UF4-NEXT:    [[TMP9:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP8]], [[PRED_STORE_IF7]] ]
 ; VF1UF4-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
 ; VF1UF4:       pred.store.if6:
 ; VF1UF4-NEXT:    [[INDUCTION2:%.*]] = add i64 [[INDEX]], 2
-; VF1UF4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION2]]
-; VF1UF4-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8
-; VF1UF4-NEXT:    store i64 [[TMP11]], i64* [[B]], align 8
+; VF1UF4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDUCTION2]]
+; VF1UF4-NEXT:    [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8
+; VF1UF4-NEXT:    store i64 [[TMP11]], ptr [[B]], align 8
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE10]]
 ; VF1UF4:       pred.store.continue7:
 ; VF1UF4-NEXT:    [[TMP12:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE8]] ], [ [[TMP11]], [[PRED_STORE_IF9]] ]
 ; VF1UF4-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]]
 ; VF1UF4:       pred.store.if8:
 ; VF1UF4-NEXT:    [[INDUCTION3:%.*]] = add i64 [[INDEX]], 3
-; VF1UF4-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION3]]
-; VF1UF4-NEXT:    [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8
-; VF1UF4-NEXT:    store i64 [[TMP14]], i64* [[B]], align 8
+; VF1UF4-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDUCTION3]]
+; VF1UF4-NEXT:    [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8
+; VF1UF4-NEXT:    store i64 [[TMP14]], ptr [[B]], align 8
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE12]]
 ; VF1UF4:       pred.store.continue9:
 ; VF1UF4-NEXT:    [[TMP15:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE10]] ], [ [[TMP14]], [[PRED_STORE_IF11]] ]
@@ -411,9 +411,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; VF1UF4-NEXT:    br label [[FOR_BODY:%.*]]
 ; VF1UF4:       for.body:
 ; VF1UF4-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; VF1UF4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]]
-; VF1UF4-NEXT:    [[V:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
-; VF1UF4-NEXT:    store i64 [[V]], i64* [[B]], align 8
+; VF1UF4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; VF1UF4-NEXT:    [[V:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
+; VF1UF4-NEXT:    store i64 [[V]], ptr [[B]], align 8
 ; VF1UF4-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; VF1UF4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14
 ; VF1UF4-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -425,9 +425,9 @@ entry:
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %iv
-  %v = load i64, i64* %arrayidx
-  store i64 %v, i64* %b
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
+  %v = load i64, ptr %arrayidx
+  store i64 %v, ptr %b
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 14
   br i1 %exitcond.not, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll
index 945de0696c70e..ea3de4a0fbb36 100644
--- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll
@@ -6,7 +6,7 @@
 ; `udiv i64 %y, %add` when expanding SCEV expressions. Make sure we pick %div,
 ; which dominates the vector loop.
 
-define void @test(i16 %x, i64 %y, i32* %ptr) {
+define void @test(i16 %x, i64 %y, ptr %ptr) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CONV19:%.*]] = sext i16 [[X:%.*]] to i64
@@ -27,7 +27,7 @@ define void @test(i16 %x, i64 %y, i32* %ptr) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    store i32 0, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -38,7 +38,7 @@ define void @test(i16 %x, i64 %y, i32* %ptr) {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    store i32 0, i32* [[PTR]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[PTR]], align 4
 ; CHECK-NEXT:    [[V2:%.*]] = trunc i64 [[IV]] to i8
 ; CHECK-NEXT:    [[V3:%.*]] = add i8 [[V2]], 1
 ; CHECK-NEXT:    [[CMP15:%.*]] = icmp slt i8 [[V3]], 5
@@ -72,7 +72,7 @@ loop.preheader:
 
 loop:
   %iv = phi i64 [ %iv.next, %loop ], [ 0, %loop.preheader ]
-  store i32 0, i32* %ptr, align 4
+  store i32 0, ptr %ptr, align 4
   %v2 = trunc i64 %iv to i8
   %v3 = add i8 %v2, 1
   %cmp15 = icmp slt i8 %v3, 5

diff  --git a/llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll b/llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll
index dde445a7c4065..461e3a0e0278b 100644
--- a/llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll
@@ -5,7 +5,7 @@
 ; expanding the memory runtime checks.
 
 @f.e = external global i32, align 1
- at d = external global i8*, align 1
+ at d = external global ptr, align 1
 
 declare i1 @cond()
 
@@ -14,7 +14,7 @@ define void @f() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[OUTER_HEADER:%.*]]
 ; CHECK:       outer.header:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** @d, align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr @d, align 1
 ; CHECK-NEXT:    [[C_0:%.*]] = call i1 @cond()
 ; CHECK-NEXT:    br i1 [[C_0]], label [[OUTER_EXIT_0:%.*]], label [[INNER_1_HEADER_PREHEADER:%.*]]
 ; CHECK:       inner.1.header.preheader:
@@ -28,26 +28,26 @@ define void @f() {
 ; CHECK:       outer.latch:
 ; CHECK-NEXT:    br label [[OUTER_HEADER]]
 ; CHECK:       outer.exit.0:
-; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i8* [ [[TMP0]], [[OUTER_HEADER]] ]
+; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi ptr [ [[TMP0]], [[OUTER_HEADER]] ]
 ; CHECK-NEXT:    br label [[LOOP_PREHEADER:%.*]]
 ; CHECK:       outer.exit.1:
-; CHECK-NEXT:    [[DOTLCSSA1:%.*]] = phi i8* [ [[TMP0]], [[INNER_1_LATCH]] ]
+; CHECK-NEXT:    [[DOTLCSSA1:%.*]] = phi ptr [ [[TMP0]], [[INNER_1_LATCH]] ]
 ; CHECK-NEXT:    br label [[LOOP_PREHEADER]]
 ; CHECK:       loop.preheader:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i8* [ [[DOTLCSSA]], [[OUTER_EXIT_0]] ], [ [[DOTLCSSA1]], [[OUTER_EXIT_1]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi ptr [ [[DOTLCSSA]], [[OUTER_EXIT_0]] ], [ [[DOTLCSSA1]], [[OUTER_EXIT_1]] ]
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
 ; CHECK:       vector.memcheck:
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* [[TMP1]], i64 1
-; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult i8* bitcast (i32* @f.e to i8*), [[SCEVGEP]]
-; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[TMP1]], bitcast (i32* getelementptr inbounds (i32, i32* @f.e, i64 1) to i8*)
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[TMP1]], i64 1
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr @f.e, [[SCEVGEP]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[TMP1]], getelementptr inbounds (i32, ptr @f.e, i64 1)
 ; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
 ; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    store i32 0, i32* @f.e, align 1, !alias.scope !0, !noalias !3
-; CHECK-NEXT:    store i8 10, i8* [[TMP0]], align 1
+; CHECK-NEXT:    store i32 0, ptr @f.e, align 1, !alias.scope !0, !noalias !3
+; CHECK-NEXT:    store i8 10, ptr [[TMP0]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 500
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -60,8 +60,8 @@ define void @f() {
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[CONV6_US_US_US:%.*]] = zext i1 false to i32
-; CHECK-NEXT:    store i32 [[CONV6_US_US_US]], i32* @f.e, align 1
-; CHECK-NEXT:    store i8 10, i8* [[TMP1]], align 1
+; CHECK-NEXT:    store i32 [[CONV6_US_US_US]], ptr @f.e, align 1
+; CHECK-NEXT:    store i8 10, ptr [[TMP1]], align 1
 ; CHECK-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
 ; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 500
 ; CHECK-NEXT:    br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -72,7 +72,7 @@ entry:
   br label %outer.header
 
 outer.header:                            ; preds = %cleanup, %entry
-  %0 = load i8*, i8** @d, align 1
+  %0 = load ptr, ptr @d, align 1
   %c.0 = call i1 @cond()
   br i1 %c.0, label %outer.exit.0, label %inner.1.header
 
@@ -97,8 +97,8 @@ outer.exit.1:                                         ; preds = %if.end, %if.end
 loop:                                  ; preds = %if.end.us.us.us, %for.body3.lr.ph.outer
   %iv = phi i32 [ %iv.next, %loop ], [ 0, %outer.exit.0 ], [ 0, %outer.exit.1 ]
   %conv6.us.us.us = zext i1 false to i32
-  store i32 %conv6.us.us.us, i32* @f.e, align 1
-  store i8 10, i8* %0, align 1
+  store i32 %conv6.us.us.us, ptr @f.e, align 1
+  store i8 10, ptr %0, align 1
   %iv.next = add nsw i32 %iv, 1
   %ec = icmp eq i32 %iv.next, 500
   br i1 %ec, label %exit, label %loop

diff  --git a/llvm/test/Transforms/LoopVectorize/pr48832.ll b/llvm/test/Transforms/LoopVectorize/pr48832.ll
index 64513a5a5e454..b89be885a5042 100644
--- a/llvm/test/Transforms/LoopVectorize/pr48832.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr48832.ll
@@ -30,8 +30,8 @@ cond.false:                                       ; preds = %for.body, %land.rhs
 
 cond.end:                                         ; preds = %land.rhs, %cond.false
   %cond = phi i32 [ 0, %cond.false ], [ 1, %land.rhs ]
-  %arrayidx = getelementptr inbounds %arrayt, %arrayt* @v_146, i16 0, i16 %storemerge
-  store i32 %cond, i32* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds %arrayt, ptr @v_146, i16 0, i16 %storemerge
+  store i32 %cond, ptr %arrayidx, align 1
   %inc = add nsw i16 %storemerge, 1
   br label %for.cond
 

diff  --git a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll
index 43451a862ea9f..1e6a94eb2b2ac 100644
--- a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll
@@ -28,11 +28,11 @@ define dso_local i16 @reverse_interleave_load_fold_mask() optsize {
 ; CHECK:       pred.load.if:
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i16 [[TMP3]], -1
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [40 x [4 x i16]], [40 x [4 x i16]]* @A, i16 0, i16 [[TMP4]], i16 0
-; CHECK-NEXT:    [[TMP6:%.*]] = load i16, i16* [[TMP5]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [40 x [4 x i16]], ptr @A, i16 0, i16 [[TMP4]], i16 0
+; CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i16> poison, i16 [[TMP6]], i32 0
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [40 x [4 x i16]], [40 x [4 x i16]]* @A, i16 0, i16 [[TMP4]], i16 3
-; CHECK-NEXT:    [[TMP9:%.*]] = load i16, i16* [[TMP8]], align 1
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [40 x [4 x i16]], ptr @A, i16 0, i16 [[TMP4]], i16 3
+; CHECK-NEXT:    [[TMP9:%.*]] = load i16, ptr [[TMP8]], align 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i16> poison, i16 [[TMP9]], i32 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -43,11 +43,11 @@ define dso_local i16 @reverse_interleave_load_fold_mask() optsize {
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP14:%.*]] = add i16 [[OFFSET_IDX]], -1
 ; CHECK-NEXT:    [[TMP15:%.*]] = add nsw i16 [[TMP14]], -1
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [40 x [4 x i16]], [40 x [4 x i16]]* @A, i16 0, i16 [[TMP15]], i16 0
-; CHECK-NEXT:    [[TMP17:%.*]] = load i16, i16* [[TMP16]], align 1
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [40 x [4 x i16]], ptr @A, i16 0, i16 [[TMP15]], i16 0
+; CHECK-NEXT:    [[TMP17:%.*]] = load i16, ptr [[TMP16]], align 1
 ; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <2 x i16> [[TMP11]], i16 [[TMP17]], i32 1
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [40 x [4 x i16]], [40 x [4 x i16]]* @A, i16 0, i16 [[TMP15]], i16 3
-; CHECK-NEXT:    [[TMP20:%.*]] = load i16, i16* [[TMP19]], align 1
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [40 x [4 x i16]], ptr @A, i16 0, i16 [[TMP15]], i16 3
+; CHECK-NEXT:    [[TMP20:%.*]] = load i16, ptr [[TMP19]], align 1
 ; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <2 x i16> [[TMP12]], i16 [[TMP20]], i32 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue2:
@@ -70,10 +70,10 @@ define dso_local i16 @reverse_interleave_load_fold_mask() optsize {
 ; CHECK-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IVMINUS1:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[SUM:%.*]] = phi i16 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[PREVSUM:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[IVMINUS1]] = add nsw i16 [[IV]], -1
-; CHECK-NEXT:    [[GEPA0:%.*]] = getelementptr inbounds [40 x [4 x i16]], [40 x [4 x i16]]* @A, i16 0, i16 [[IVMINUS1]], i16 0
-; CHECK-NEXT:    [[TMP29:%.*]] = load i16, i16* [[GEPA0]], align 1
-; CHECK-NEXT:    [[GEPA3:%.*]] = getelementptr inbounds [40 x [4 x i16]], [40 x [4 x i16]]* @A, i16 0, i16 [[IVMINUS1]], i16 3
-; CHECK-NEXT:    [[TMP30:%.*]] = load i16, i16* [[GEPA3]], align 1
+; CHECK-NEXT:    [[GEPA0:%.*]] = getelementptr inbounds [40 x [4 x i16]], ptr @A, i16 0, i16 [[IVMINUS1]], i16 0
+; CHECK-NEXT:    [[TMP29:%.*]] = load i16, ptr [[GEPA0]], align 1
+; CHECK-NEXT:    [[GEPA3:%.*]] = getelementptr inbounds [40 x [4 x i16]], ptr @A, i16 0, i16 [[IVMINUS1]], i16 3
+; CHECK-NEXT:    [[TMP30:%.*]] = load i16, ptr [[GEPA3]], align 1
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i16 [[TMP29]], [[TMP30]]
 ; CHECK-NEXT:    [[PREVSUM]] = add nsw i16 [[SUM]], [[ADD]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i16 [[IV]], 1
@@ -89,10 +89,10 @@ loop:
   %iv = phi i16 [ 41, %entry ], [ %ivMinus1, %loop ]
   %sum = phi i16 [ 0, %entry ], [ %prevSum, %loop ]
   %ivMinus1 = add nsw i16 %iv, -1
-  %gepA0 = getelementptr inbounds [40 x [4 x i16]], [40 x [4 x i16]]* @A, i16 0, i16 %ivMinus1, i16 0
-  %0 = load i16, i16* %gepA0, align 1
-  %gepA3 = getelementptr inbounds [40 x [4 x i16]], [40 x [4 x i16]]* @A, i16 0, i16 %ivMinus1, i16 3
-  %1 = load i16, i16* %gepA3, align 1
+  %gepA0 = getelementptr inbounds [40 x [4 x i16]], ptr @A, i16 0, i16 %ivMinus1, i16 0
+  %0 = load i16, ptr %gepA0, align 1
+  %gepA3 = getelementptr inbounds [40 x [4 x i16]], ptr @A, i16 0, i16 %ivMinus1, i16 3
+  %1 = load i16, ptr %gepA3, align 1
   %add = add nsw i16 %0, %1
   %prevSum = add nsw i16 %sum, %add
   %cmp = icmp ugt i16 %iv, 1

diff  --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
index 9d01797cda210..8cd15daa83e6c 100644
--- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
+++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
@@ -9,16 +9,16 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; DEBUGLOC-LABEL: define void @_Z3fooPf(
 ; Check that the phi to resume the scalar part of the loop
 ; has Debug Location.
-define void @_Z3fooPf(float* %a) {
+define void @_Z3fooPf(ptr %a) {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %p = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %p = load float, ptr %arrayidx, align 4
   %mul = fmul float %p, 2.000000e+00
-  store float %mul, float* %arrayidx, align 4
+  store float %mul, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0

diff  --git a/llvm/test/Transforms/LoopVectorize/ptr-induction.ll b/llvm/test/Transforms/LoopVectorize/ptr-induction.ll
index e05e3e25942f5..f2637991c020c 100644
--- a/llvm/test/Transforms/LoopVectorize/ptr-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/ptr-induction.ll
@@ -7,21 +7,21 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 ; CHECK: @f
 ; Expect that the pointer indvar has been converted into an integer indvar.
 ; CHECK: %index.next = add nuw i64 %index, 4
-define i32 @f(i32* readonly %a, i32* readnone %b) #0 {
+define i32 @f(ptr readonly %a, ptr readnone %b) #0 {
 entry:
-  %cmp.6 = icmp ult i32* %a, %b
+  %cmp.6 = icmp ult ptr %a, %b
   br i1 %cmp.6, label %while.body.preheader, label %while.end
 
 while.body.preheader:                             ; preds = %entry
   br label %while.body
 
 while.body:                                       ; preds = %while.body.preheader, %while.body
-  %a.pn = phi i32* [ %incdec.ptr8, %while.body ], [ %a, %while.body.preheader ]
+  %a.pn = phi ptr [ %incdec.ptr8, %while.body ], [ %a, %while.body.preheader ]
   %acc.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
-  %incdec.ptr8 = getelementptr inbounds i32, i32* %a.pn, i64 1
-  %0 = load i32, i32* %incdec.ptr8, align 1
+  %incdec.ptr8 = getelementptr inbounds i32, ptr %a.pn, i64 1
+  %0 = load i32, ptr %incdec.ptr8, align 1
   %add = add nuw nsw i32 %0, %acc.07
-  %exitcond = icmp eq i32* %incdec.ptr8, %b
+  %exitcond = icmp eq ptr %incdec.ptr8, %b
   br i1 %exitcond, label %while.cond.while.end_crit_edge, label %while.body
 
 while.cond.while.end_crit_edge:                   ; preds = %while.body

diff  --git a/llvm/test/Transforms/LoopVectorize/ptr_loops.ll b/llvm/test/Transforms/LoopVectorize/ptr_loops.ll
index 6520ef444cc1a..ec0f8456c91b6 100644
--- a/llvm/test/Transforms/LoopVectorize/ptr_loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/ptr_loops.ll
@@ -14,14 +14,14 @@ define i32 @_Z5test1v() nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %0, %1
-  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32], [36 x i32]* @A, i64 0, i64 18), %0 ], [ %4, %1 ]
-  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32], [36 x i32]* @B, i64 0, i64 0), %0 ], [ %5, %1 ]
-  %2 = load i32, i32* %b.01, align 4
+  %p.02 = phi ptr [ getelementptr inbounds ([36 x i32], ptr @A, i64 0, i64 18), %0 ], [ %4, %1 ]
+  %b.01 = phi ptr [ @B, %0 ], [ %5, %1 ]
+  %2 = load i32, ptr %b.01, align 4
   %3 = shl nsw i32 %2, 1
-  store i32 %3, i32* %p.02, align 4
-  %4 = getelementptr inbounds i32, i32* %p.02, i64 -1
-  %5 = getelementptr inbounds i32, i32* %b.01, i64 1
-  %6 = icmp eq i32* %4, getelementptr ([36 x i32], [36 x i32]* @A, i64 128102389400760775, i64 3)
+  store i32 %3, ptr %p.02, align 4
+  %4 = getelementptr inbounds i32, ptr %p.02, i64 -1
+  %5 = getelementptr inbounds i32, ptr %b.01, i64 1
+  %6 = icmp eq ptr %4, getelementptr ([36 x i32], ptr @A, i64 128102389400760775, i64 3)
   br i1 %6, label %7, label %1
 
 ; <label>:7                                       ; preds = %1
@@ -37,13 +37,13 @@ define i32 @_Z5test2v() nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %0, %1
-  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32], [36 x i32]* @A, i64 0, i64 25), %0 ], [ %3, %1 ]
-  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32], [36 x i32]* @B, i64 0, i64 2), %0 ], [ %4, %1 ]
-  %2 = load i32, i32* %b.01, align 4
-  store i32 %2, i32* %p.02, align 4
-  %3 = getelementptr inbounds i32, i32* %p.02, i64 -1
-  %4 = getelementptr inbounds i32, i32* %b.01, i64 1
-  %5 = icmp eq i32* %4, getelementptr inbounds ([36 x i32], [36 x i32]* @A, i64 0, i64 18)
+  %p.02 = phi ptr [ getelementptr inbounds ([36 x i32], ptr @A, i64 0, i64 25), %0 ], [ %3, %1 ]
+  %b.01 = phi ptr [ getelementptr inbounds ([36 x i32], ptr @B, i64 0, i64 2), %0 ], [ %4, %1 ]
+  %2 = load i32, ptr %b.01, align 4
+  store i32 %2, ptr %p.02, align 4
+  %3 = getelementptr inbounds i32, ptr %p.02, i64 -1
+  %4 = getelementptr inbounds i32, ptr %b.01, i64 1
+  %5 = icmp eq ptr %4, getelementptr inbounds ([36 x i32], ptr @A, i64 0, i64 18)
   br i1 %5, label %6, label %1
 
 ; <label>:6                                       ; preds = %1
@@ -59,13 +59,13 @@ define i32 @_Z5test3v() nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %0, %1
-  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32], [36 x i32]* @A, i64 0, i64 29), %0 ], [ %3, %1 ]
-  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32], [36 x i32]* @B, i64 0, i64 5), %0 ], [ %4, %1 ]
-  %2 = load i32, i32* %b.01, align 4
-  store i32 %2, i32* %p.02, align 4
-  %3 = getelementptr inbounds i32, i32* %p.02, i64 -1
-  %4 = getelementptr inbounds i32, i32* %b.01, i64 1
-  %5 = icmp eq i32* %3, getelementptr ([36 x i32], [36 x i32]* @A, i64 128102389400760775, i64 3)
+  %p.02 = phi ptr [ getelementptr inbounds ([36 x i32], ptr @A, i64 0, i64 29), %0 ], [ %3, %1 ]
+  %b.01 = phi ptr [ getelementptr inbounds ([36 x i32], ptr @B, i64 0, i64 5), %0 ], [ %4, %1 ]
+  %2 = load i32, ptr %b.01, align 4
+  store i32 %2, ptr %p.02, align 4
+  %3 = getelementptr inbounds i32, ptr %p.02, i64 -1
+  %4 = getelementptr inbounds i32, ptr %b.01, i64 1
+  %5 = icmp eq ptr %3, getelementptr ([36 x i32], ptr @A, i64 128102389400760775, i64 3)
   br i1 %5, label %6, label %1
 
 ; <label>:6                                       ; preds = %1

diff  --git a/llvm/test/Transforms/LoopVectorize/read-only.ll b/llvm/test/Transforms/LoopVectorize/read-only.ll
index 06f0d0a4a2da8..603ba4da034f6 100644
--- a/llvm/test/Transforms/LoopVectorize/read-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/read-only.ll
@@ -5,18 +5,18 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK-LABEL: @read_only_func(
 ;CHECK: load <4 x i32>
 ;CHECK: ret i32
-define i32 @read_only_func(i32* nocapture %A, i32* nocapture %B, i32 %n) nounwind uwtable readonly ssp {
+define i32 @read_only_func(ptr nocapture %A, ptr nocapture %B, i32 %n) nounwind uwtable readonly ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
   %4 = add nsw i64 %indvars.iv, 13
-  %5 = getelementptr inbounds i32, i32* %B, i64 %4
-  %6 = load i32, i32* %5, align 4
+  %5 = getelementptr inbounds i32, ptr %B, i64 %4
+  %6 = load i32, ptr %5, align 4
   %7 = shl i32 %6, 1
   %8 = add i32 %3, %sum.02
   %9 = add i32 %8, %7
@@ -35,18 +35,18 @@ define i32 @read_only_func(i32* nocapture %A, i32* nocapture %B, i32 %n) nounwin
 ;CHECK-LABEL: @read_only_func_volatile(
 ;CHECK-NOT: load <4 x i32>
 ;CHECK: ret i32
-define i32 @read_only_func_volatile(i32* nocapture %A, i32* nocapture %B, i32 %n) nounwind uwtable readonly ssp {
+define i32 @read_only_func_volatile(ptr nocapture %A, ptr nocapture %B, i32 %n) nounwind uwtable readonly ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load volatile i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load volatile i32, ptr %2, align 4
   %4 = add nsw i64 %indvars.iv, 13
-  %5 = getelementptr inbounds i32, i32* %B, i64 %4
-  %6 = load i32, i32* %5, align 4
+  %5 = getelementptr inbounds i32, ptr %B, i64 %4
+  %6 = load i32, ptr %5, align 4
   %7 = shl i32 %6, 1
   %8 = add i32 %3, %sum.02
   %9 = add i32 %8, %7

diff  --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
index 36acea0e02c89..34767500dfd85 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S | FileCheck %s
 
-define float @cond_fadd(float* noalias nocapture readonly %a, float* noalias nocapture readonly %cond, i64 %N){
+define float @cond_fadd(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %cond, i64 %N){
 ; CHECK-LABEL: @cond_fadd(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
@@ -11,15 +11,14 @@ define float @cond_fadd(float* noalias nocapture readonly %a, float* noalias noc
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, float* [[COND:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00>
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load float, float* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -28,8 +27,8 @@ define float @cond_fadd(float* noalias nocapture readonly %a, float* noalias noc
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load float, float* [[TMP10]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP11]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -38,8 +37,8 @@ define float @cond_fadd(float* noalias nocapture readonly %a, float* noalias noc
 ; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP15:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP17:%.*]] = load float, float* [[TMP16]], align 4
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load float, ptr [[TMP16]], align 4
 ; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP17]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -48,8 +47,8 @@ define float @cond_fadd(float* noalias nocapture readonly %a, float* noalias noc
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP23]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -69,13 +68,13 @@ define float @cond_fadd(float* noalias nocapture readonly %a, float* noalias noc
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
 ; CHECK-NEXT:    [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RES:%.*]], [[FOR_INC]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[COND]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP29:%.*]] = load float, float* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP29:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP29]], 5.000000e+00
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP30:%.*]] = load float, float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP30:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[FADD:%.*]] = fadd fast float [[RDX]], [[TMP30]]
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
@@ -93,14 +92,14 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
   %rdx = phi float [ 1.000000e+00, %entry ], [ %res, %for.inc ]
-  %arrayidx = getelementptr inbounds float, float* %cond, i64 %iv
-  %0 = load float, float* %arrayidx
+  %arrayidx = getelementptr inbounds float, ptr %cond, i64 %iv
+  %0 = load float, ptr %arrayidx
   %tobool = fcmp une float %0, 5.000000e+00
   br i1 %tobool, label %if.then, label %for.inc
 
 if.then:
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %iv
-  %1 = load float, float* %arrayidx2
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %iv
+  %1 = load float, ptr %arrayidx2
   %fadd = fadd fast float %rdx, %1
   br label %for.inc
 
@@ -114,7 +113,7 @@ for.end:
   ret float %res
 }
 
-define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) {
+define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) {
 ; CHECK-LABEL: @cond_cmp_sel(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
@@ -125,15 +124,14 @@ define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, float* [[COND:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load float, float* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -142,8 +140,8 @@ define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load float, float* [[TMP10]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP11]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -152,8 +150,8 @@ define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP15:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP17:%.*]] = load float, float* [[TMP16]], align 4
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load float, ptr [[TMP16]], align 4
 ; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP17]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -162,8 +160,8 @@ define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP23]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -184,13 +182,13 @@ define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
 ; CHECK-NEXT:    [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RES:%.*]], [[FOR_INC]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[COND]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP30:%.*]] = load float, float* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP30:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP30]], 3.000000e+00
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP31:%.*]] = load float, float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP31:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[TMP32:%.*]] = call fast float @llvm.minnum.f32(float [[RDX]], float [[TMP31]])
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
@@ -208,14 +206,14 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
   %rdx = phi float [ 1.000000e+00, %entry ], [ %res, %for.inc ]
-  %arrayidx = getelementptr inbounds float, float* %cond, i64 %iv
-  %0 = load float, float* %arrayidx
+  %arrayidx = getelementptr inbounds float, ptr %cond, i64 %iv
+  %0 = load float, ptr %arrayidx
   %tobool = fcmp une float %0, 3.000000e+00
   br i1 %tobool, label %if.then, label %for.inc
 
 if.then:
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %iv
-  %1 = load float, float* %arrayidx2
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %iv
+  %1 = load float, ptr %arrayidx2
   %fcmp = fcmp fast olt float %rdx, %1
   %fsel = select fast i1 %fcmp, float %rdx, float %1
   br label %for.inc
@@ -230,7 +228,7 @@ for.end:
   ret float %res
 }
 
-define i32 @conditional_and(i32* noalias %A, i32* noalias %B, i32 %cond, i64 noundef %N) #0 {
+define i32 @conditional_and(ptr noalias %A, ptr noalias %B, i32 %cond, i64 noundef %N) #0 {
 ; CHECK-LABEL: @conditional_and(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
@@ -243,15 +241,14 @@ define i32 @conditional_and(i32* noalias %A, i32* noalias %B, i32 %cond, i64 nou
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 7, [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -260,8 +257,8 @@ define i32 @conditional_and(i32* noalias %A, i32* noalias %B, i32 %cond, i64 nou
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP11]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -270,8 +267,8 @@ define i32 @conditional_and(i32* noalias %A, i32* noalias %B, i32 %cond, i64 nou
 ; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP15:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
 ; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP17]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -280,8 +277,8 @@ define i32 @conditional_and(i32* noalias %A, i32* noalias %B, i32 %cond, i64 nou
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP23]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -302,13 +299,13 @@ define i32 @conditional_and(i32* noalias %A, i32* noalias %B, i32 %cond, i64 nou
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
 ; CHECK-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RES:%.*]], [[FOR_INC]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP30:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP30]], [[COND]]
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP31:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[TMP31]], [[RDX]]
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
@@ -326,14 +323,14 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
   %rdx = phi i32 [ 7, %entry ], [ %res, %for.inc ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %iv
-  %0 = load i32, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %iv
+  %0 = load i32, ptr %arrayidx
   %tobool = icmp eq i32 %0, %cond
   br i1 %tobool, label %if.then, label %for.inc
 
 if.then:
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %iv
-  %1 = load i32, i32* %arrayidx2
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %iv
+  %1 = load i32, ptr %arrayidx2
   %and = and i32 %1, %rdx
   br label %for.inc
 
@@ -347,7 +344,7 @@ for.end:
   ret i32 %res
 }
 
-define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %cond, i64 noundef %N) {
+define i32 @simple_chained_rdx(ptr noalias %a, ptr noalias %b, ptr noalias %cond, i64 noundef %N) {
 ; CHECK-LABEL: @simple_chained_rdx(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
@@ -361,15 +358,14 @@ define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %c
 ; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP8]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -377,8 +373,8 @@ define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %c
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x i1> [[TMP5]], i64 1
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP13]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -386,8 +382,8 @@ define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %c
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP5]], i64 2
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
 ; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP18]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -395,8 +391,8 @@ define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %c
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x i1> [[TMP5]], i64 3
 ; CHECK-NEXT:    br i1 [[TMP21]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; CHECK:       pred.load.if5:
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP23]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -407,8 +403,8 @@ define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %c
 ; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <4 x i1> [[TMP5]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP29]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; CHECK:       pred.load.if7:
-; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4
+; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
 ; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <4 x i32> poison, i32 [[TMP31]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; CHECK:       pred.load.continue8:
@@ -416,8 +412,8 @@ define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %c
 ; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP5]], i64 1
 ; CHECK-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; CHECK:       pred.load.if9:
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
 ; CHECK:       pred.load.continue10:
@@ -425,8 +421,8 @@ define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %c
 ; CHECK-NEXT:    [[TMP39:%.*]] = extractelement <4 x i1> [[TMP5]], i64 2
 ; CHECK-NEXT:    br i1 [[TMP39]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; CHECK:       pred.load.if11:
-; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4
+; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
 ; CHECK-NEXT:    [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
 ; CHECK:       pred.load.continue12:
@@ -434,8 +430,8 @@ define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %c
 ; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <4 x i1> [[TMP5]], i64 3
 ; CHECK-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
 ; CHECK:       pred.load.if13:
-; CHECK-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4
+; CHECK-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4
 ; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i32> [[TMP43]], i32 [[TMP46]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; CHECK:       pred.load.continue14:
@@ -456,16 +452,16 @@ define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %c
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[RDX:%.*]] = phi i32 [ [[RES:%.*]], [[FOR_INC]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[COND]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP53:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP53:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP53]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP54:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP54:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP54]], [[RDX]]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP55:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP55:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[ADD]], [[TMP55]]
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
@@ -483,17 +479,17 @@ entry:
 for.body:
   %iv = phi i64 [ %iv.next, %for.inc ], [ 0, %entry ]
   %rdx = phi i32 [ %res, %for.inc ], [ 5, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %cond, i64 %iv
-  %0 = load i32, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %cond, i64 %iv
+  %0 = load i32, ptr %arrayidx
   %tobool.not = icmp eq i32 %0, 0
   br i1 %tobool.not, label %for.inc, label %if.then
 
 if.then:
-  %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %iv
-  %1 = load i32, i32* %arrayidx1
+  %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %iv
+  %1 = load i32, ptr %arrayidx1
   %add = add nsw i32 %1, %rdx
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %2 = load i32, i32* %arrayidx2
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %2 = load i32, ptr %arrayidx2
   %add3 = add nsw i32 %add, %2
   br label %for.inc
 
@@ -514,7 +510,7 @@ for.end:
 ;
 ; Reduction not performed in loop as the phi has more than two incoming values
 ;
-define i64 @nested_cond_and(i64* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, i64* noalias nocapture readonly %cond, i64 %N){
+define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %cond, i64 %N){
 ; CHECK-LABEL: @nested_cond_and(
 ; CHECK:       vector.body:
 ; CHECK-NOT:     @llvm.vector.reduce.and
@@ -527,21 +523,21 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
   %rdx = phi i64 [ 5, %entry ], [ %res, %for.inc ]
-  %arrayidx = getelementptr inbounds i64, i64* %cond, i64 %iv
-  %0 = load i64, i64* %arrayidx
+  %arrayidx = getelementptr inbounds i64, ptr %cond, i64 %iv
+  %0 = load i64, ptr %arrayidx
   %tobool = icmp eq i64 %0, 0
   br i1 %tobool, label %if.then, label %for.inc
 
 if.then:
-  %arrayidx2 = getelementptr inbounds i64, i64* %a, i64 %iv
-  %1 = load i64, i64* %arrayidx2
+  %arrayidx2 = getelementptr inbounds i64, ptr %a, i64 %iv
+  %1 = load i64, ptr %arrayidx2
   %and1 = and i64 %rdx, %1
   %tobool2 = icmp eq i64 %1, 3
   br i1 %tobool2, label %if.then.2, label %for.inc
 
 if.then.2:
-  %arrayidx3 = getelementptr inbounds i64, i64* %b, i64 %iv
-  %2 = load i64, i64* %arrayidx3
+  %arrayidx3 = getelementptr inbounds i64, ptr %b, i64 %iv
+  %2 = load i64, ptr %arrayidx3
   %and2 = and i64 %rdx, %2
   br label %for.inc
 
@@ -559,7 +555,7 @@ for.end:
 ; if they are the last in the chain, i.e. the loop exit instruction is a Phi node. Therefore we reject
 ; the Phi (%rdx1) as it has more than one use.
 ;
-define i32 @cond-uncond(i32* noalias %src1, i32* noalias %src2, i32* noalias %cond, i64 noundef %n) #0 {
+define i32 @cond-uncond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, i64 noundef %n) #0 {
 ; CHECK-LABEL: @cond-uncond(
 ; CHECK:       pred.load.continue6:
 ; CHECK-NOT:     @llvm.vector.reduce.add
@@ -572,21 +568,21 @@ entry:
 for.body:
   %rdx1 = phi i32 [ %add2, %if.end ], [ 0, %entry ]
   %iv = phi i64 [ %iv.next, %if.end ], [ 0, %entry]
-  %arrayidx = getelementptr inbounds i32, i32* %cond, i64 %iv
-  %0 = load i32, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %cond, i64 %iv
+  %0 = load i32, ptr %arrayidx
   %tobool.not = icmp eq i32 %0, 0
   br i1 %tobool.not, label %if.end, label %if.then
 
 if.then:
-  %arrayidx1 = getelementptr inbounds i32, i32* %src2, i64 %iv
-  %1 = load i32, i32* %arrayidx1
+  %arrayidx1 = getelementptr inbounds i32, ptr %src2, i64 %iv
+  %1 = load i32, ptr %arrayidx1
   %add = add nsw i32 %1, %rdx1
   br label %if.end
 
 if.end:
   %res = phi i32 [ %add, %if.then ], [ %rdx1, %for.body ]
-  %arrayidx2 = getelementptr inbounds i32, i32* %src1, i64 %iv
-  %2 = load i32, i32* %arrayidx2
+  %arrayidx2 = getelementptr inbounds i32, ptr %src1, i64 %iv
+  %2 = load i32, ptr %arrayidx2
   %add2 = add nsw i32 %2, %res
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -600,7 +596,7 @@ for.end:
 ; Chain of two conditional reductions. We do not vectorise this with in-loop reductions as neither
 ; of the incoming values of the LoopExitInstruction (%res) is the reduction Phi (%rdx1).
 ;
-define float @cond_cond(float* noalias %src1, float* noalias %src2, float* noalias %cond, i64 %n) #0 {
+define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, i64 %n) #0 {
 ; CHECK-LABEL: @cond_cond(
 ; CHECK:       pred.load.continue14:
 ; CHECK-NOT:     @llvm.vector.reduce.fadd
@@ -613,14 +609,14 @@ entry:
 for.body:
   %rdx1 = phi float [ %res, %for.inc ], [ 2.000000e+00, %entry ]
   %iv = phi i64 [ %iv.next, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %cond, i64 %iv
-  %0 = load float, float* %arrayidx
+  %arrayidx = getelementptr inbounds float, ptr %cond, i64 %iv
+  %0 = load float, ptr %arrayidx
   %cmp1 = fcmp fast oeq float %0, 3.000000e+00
   br i1 %cmp1, label %if.then, label %if.end
 
 if.then:
-  %arrayidx2 = getelementptr inbounds float, float* %src1, i64 %iv
-  %1 = load float, float* %arrayidx2
+  %arrayidx2 = getelementptr inbounds float, ptr %src1, i64 %iv
+  %1 = load float, ptr %arrayidx2
   %add = fadd fast float %1, %rdx1
   br label %if.end
 
@@ -630,8 +626,8 @@ if.end:
   br i1 %cmp5, label %if.then6, label %for.inc
 
 if.then6:
-  %arrayidx7 = getelementptr inbounds float, float* %src2, i64 %iv
-  %2 = load float, float* %arrayidx7
+  %arrayidx7 = getelementptr inbounds float, ptr %src2, i64 %iv
+  %2 = load float, ptr %arrayidx7
   %add2 = fadd fast float %2, %rdx2
   br label %for.inc
 
@@ -649,7 +645,7 @@ for.end:
 ; Chain of an unconditional & a conditional reduction. We do not vectorise this in-loop as neither of the
 ; incoming values of the LoopExitInstruction (%res) is the reduction Phi (%rdx).
 ;
-define i32 @uncond_cond(i32* noalias %src1, i32* noalias %src2, i32* noalias %cond, i64 %N) #0 {
+define i32 @uncond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, i64 %N) #0 {
 ; CHECK-LABEL: @uncond_cond(
 ; CHECK:       pred.load.continue7:
 ; CHECK-NOT:     @llvm.vector.reduce.add
@@ -662,17 +658,17 @@ entry:
 for.body:
   %rdx = phi i32 [ %res, %for.inc ], [ 0, %entry ]
   %iv = phi i64 [ %iv.next, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %src1, i64 %iv
-  %0 = load i32, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %src1, i64 %iv
+  %0 = load i32, ptr %arrayidx
   %add1 = add nsw i32 %0, %rdx
-  %arrayidx1 = getelementptr inbounds i32, i32* %cond, i64 %iv
-  %1 = load i32, i32* %arrayidx1
+  %arrayidx1 = getelementptr inbounds i32, ptr %cond, i64 %iv
+  %1 = load i32, ptr %arrayidx1
   %tobool.not = icmp eq i32 %1, 0
   br i1 %tobool.not, label %for.inc, label %if.then
 
 if.then:
-  %arrayidx2 = getelementptr inbounds i32, i32* %src2, i64 %iv
-  %2 = load i32, i32* %arrayidx2
+  %arrayidx2 = getelementptr inbounds i32, ptr %src2, i64 %iv
+  %2 = load i32, ptr %arrayidx2
   %add2 = add nsw i32 %2, %add1
   br label %for.inc
 
@@ -690,7 +686,7 @@ for.end:
 ; Chain of multiple unconditional & conditional reductions. Does not vectorise in-loop as when we look back
 ; through the chain and check the number of uses of %add1, we find more than the expected one use.
 ;
-define i32 @uncond_cond_uncond(i32* noalias %src1, i32* noalias %src2, i32* noalias %cond, i64 noundef %N) {
+define i32 @uncond_cond_uncond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, i64 noundef %N) {
 ; CHECK-LABEL: @uncond_cond_uncond(
 ; CHECK:       pred.load.continue7:
 ; CHECK-NOT:     @llvm.vector.reduce.add
@@ -703,17 +699,17 @@ entry:
 for.body:
   %rdx = phi i32 [ %add3, %if.end ], [ 0, %entry ]
   %iv = phi i64 [ %iv.next, %if.end ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %src1, i64 %iv
-  %0 = load i32, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %src1, i64 %iv
+  %0 = load i32, ptr %arrayidx
   %add1 = add nsw i32 %0, %rdx
-  %arrayidx1 = getelementptr inbounds i32, i32* %cond, i64 %iv
-  %1 = load i32, i32* %arrayidx1
+  %arrayidx1 = getelementptr inbounds i32, ptr %cond, i64 %iv
+  %1 = load i32, ptr %arrayidx1
   %tobool.not = icmp eq i32 %1, 0
   br i1 %tobool.not, label %if.end, label %if.then
 
 if.then:
-  %arrayidx2 = getelementptr inbounds i32, i32* %src2, i64 %iv
-  %2 = load i32, i32* %arrayidx2
+  %arrayidx2 = getelementptr inbounds i32, ptr %src2, i64 %iv
+  %2 = load i32, ptr %arrayidx2
   %add2 = add nsw i32 %2, %add1
   br label %if.end
 

diff  --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
index a827298a01b21..e7f33a1a6296a 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
@@ -3,7 +3,7 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define i32 @reduction_sum_single(i32* noalias nocapture %A) {
+define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_sum_single(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -17,8 +17,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -27,8 +27,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -37,8 +37,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP13:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -47,8 +47,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP19:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
 ; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -76,8 +76,8 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
   %l7 = add i32 %sum.02, %l3
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -89,7 +89,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_sum(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -104,11 +104,11 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -118,11 +118,11 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -132,11 +132,11 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -146,11 +146,11 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]]
 ; CHECK:       pred.load.if7:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; CHECK:       pred.load.continue8:
@@ -186,10 +186,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l6 = trunc i64 %indvars.iv to i32
   %l7 = add i32 %sum.02, %l6
   %l8 = add i32 %l7, %l3
@@ -204,7 +204,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_sum_const(i32* noalias nocapture %A) {
+define i32 @reduction_sum_const(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_sum_const(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -218,8 +218,8 @@ define i32 @reduction_sum_const(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -228,8 +228,8 @@ define i32 @reduction_sum_const(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -238,8 +238,8 @@ define i32 @reduction_sum_const(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP13:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -248,8 +248,8 @@ define i32 @reduction_sum_const(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP19:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
 ; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -280,8 +280,8 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
   %l7 = add i32 %sum.02, %l3
   %l9 = add i32 %l7, 3
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -294,7 +294,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_prod(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -309,11 +309,11 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -323,11 +323,11 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -337,11 +337,11 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -351,11 +351,11 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]]
 ; CHECK:       pred.load.if7:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; CHECK:       pred.load.continue8:
@@ -391,10 +391,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l6 = trunc i64 %indvars.iv to i32
   %l7 = mul i32 %prod.02, %l6
   %l8 = mul i32 %l7, %l3
@@ -409,7 +409,7 @@ entry:
   ret i32 %prod.0.lcssa
 }
 
-define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_mix(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -424,11 +424,11 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -438,11 +438,11 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -452,11 +452,11 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -466,11 +466,11 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]]
 ; CHECK:       pred.load.if7:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; CHECK:       pred.load.continue8:
@@ -504,10 +504,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l6 = mul nsw i32 %l5, %l3
   %l7 = trunc i64 %indvars.iv to i32
   %l8 = add i32 %sum.02, %l7
@@ -522,7 +522,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_mul(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -536,11 +536,11 @@ define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -550,11 +550,11 @@ define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -564,11 +564,11 @@ define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -578,11 +578,11 @@ define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -614,10 +614,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 19, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l6 = mul i32 %sum.02, %l3
   %l7 = mul i32 %l6, %l5
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -630,7 +630,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_and(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -644,11 +644,11 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -658,11 +658,11 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -672,11 +672,11 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -686,11 +686,11 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -722,10 +722,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = and i32 %result.08, %l0
   %and = and i32 %add, %l1
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -738,7 +738,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_or(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -752,11 +752,11 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -766,11 +766,11 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -780,11 +780,11 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -794,11 +794,11 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -828,10 +828,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %l1, %l0
   %or = or i32 %add, %result.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -844,7 +844,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_xor(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -858,11 +858,11 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -872,11 +872,11 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -886,11 +886,11 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -900,11 +900,11 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -934,10 +934,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %l1, %l0
   %xor = xor i32 %add, %result.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -950,7 +950,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
+define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_fadd(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -964,11 +964,11 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -978,11 +978,11 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load float, float* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load float, float* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -992,11 +992,11 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load float, float* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load float, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -1006,11 +1006,11 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load float, float* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load float, float* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load float, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -1040,10 +1040,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %l1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %l1 = load float, ptr %arrayidx2, align 4
   %add = fadd fast float %result.08, %l0
   %fadd = fadd fast float %add, %l1
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -1056,7 +1056,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret float %result.0.lcssa
 }
 
-define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
+define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_fmul(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -1070,11 +1070,11 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -1084,11 +1084,11 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load float, float* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load float, float* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -1098,11 +1098,11 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load float, float* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load float, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -1112,11 +1112,11 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load float, float* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load float, float* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load float, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -1148,10 +1148,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %l1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %l1 = load float, ptr %arrayidx2, align 4
   %add = fmul fast float %result.08, %l0
   %fmul = fmul fast float %add, %l1
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -1164,7 +1164,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret float %result.0.lcssa
 }
 
-define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_min(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -1178,8 +1178,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -1188,8 +1188,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -1198,8 +1198,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP13:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -1208,8 +1208,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP19:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
 ; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -1237,8 +1237,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
   %c0 = icmp slt i32 %result.08, %l0
   %v0 = select i1 %c0, i32 %result.08, i32 %l0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -1251,7 +1251,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_max(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -1265,8 +1265,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -1275,8 +1275,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -1285,8 +1285,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP13:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -1295,8 +1295,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP19:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
 ; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -1324,8 +1324,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
   %c0 = icmp ugt i32 %result.08, %l0
   %v0 = select i1 %c0, i32 %result.08, i32 %l0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -1339,7 +1339,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ; Conditional reductions with multi-input phis.
-define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
+define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
 ; CHECK-LABEL: @reduction_conditional(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -1349,12 +1349,10 @@ define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[PREDPHI3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[TMP1]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[TMP3]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
 ; CHECK-NEXT:    [[TMP7:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
@@ -1397,10 +1395,10 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
   %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %l1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %l1 = load float, ptr %arrayidx2, align 4
   %cmp3 = fcmp ogt float %l0, %l1
   br i1 %cmp3, label %if.then, label %for.inc
 
@@ -1432,7 +1430,7 @@ for.end:
   ret float %sum.1.lcssa
 }
 
-define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
+define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_add_trunc(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -1448,8 +1446,8 @@ define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
 ; CHECK-NEXT:    [[TMP3:%.*]] = sext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP5]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -1459,8 +1457,8 @@ define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP9:%.*]] = or i32 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i8, i8* [[TMP11]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 4
 ; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x i8> [[TMP7]], i8 [[TMP12]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -1470,8 +1468,8 @@ define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP16:%.*]] = or i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP17:%.*]] = sext i32 [[TMP16]] to i64
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = load i8, i8* [[TMP18]], align 4
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP17]]
+; CHECK-NEXT:    [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 4
 ; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x i8> [[TMP14]], i8 [[TMP19]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -1481,8 +1479,8 @@ define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP23:%.*]] = or i32 [[INDEX]], 3
 ; CHECK-NEXT:    [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP24]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i8, i8* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP24]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i8, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP26]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -1513,8 +1511,8 @@ entry:
   %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ]
   %sum.02 = and i32 %sum.02p, 255
-  %l2 = getelementptr inbounds i8, i8* %A, i32 %indvars.iv
-  %l3 = load i8, i8* %l2, align 4
+  %l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv
+  %l3 = load i8, ptr %l2, align 4
   %l3e = zext i8 %l3 to i32
   %l9 = add i32 %sum.02, %l3e
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -1528,7 +1526,7 @@ entry:
 }
 
 
-define i8 @reduction_and_trunc(i8* noalias nocapture %A) {
+define i8 @reduction_and_trunc(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_and_trunc(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -1543,8 +1541,8 @@ define i8 @reduction_and_trunc(i8* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i8, i8* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i8> poison, i8 [[TMP4]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -1554,8 +1552,8 @@ define i8 @reduction_and_trunc(i8* noalias nocapture %A) {
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP8:%.*]] = or i32 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i8> [[TMP6]], i8 [[TMP11]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -1565,8 +1563,8 @@ define i8 @reduction_and_trunc(i8* noalias nocapture %A) {
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP15:%.*]] = or i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP16:%.*]] = sext i32 [[TMP15]] to i64
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP16]]
-; CHECK-NEXT:    [[TMP18:%.*]] = load i8, i8* [[TMP17]], align 4
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP16]]
+; CHECK-NEXT:    [[TMP18:%.*]] = load i8, ptr [[TMP17]], align 4
 ; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i8> [[TMP13]], i8 [[TMP18]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -1576,8 +1574,8 @@ define i8 @reduction_and_trunc(i8* noalias nocapture %A) {
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP22:%.*]] = or i32 [[INDEX]], 3
 ; CHECK-NEXT:    [[TMP23:%.*]] = sext i32 [[TMP22]] to i64
-; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP23]]
-; CHECK-NEXT:    [[TMP25:%.*]] = load i8, i8* [[TMP24]], align 4
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP23]]
+; CHECK-NEXT:    [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 4
 ; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP25]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -1608,8 +1606,8 @@ entry:
   %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ]
   %sum.02 = and i32 %sum.02p, 255
-  %l2 = getelementptr inbounds i8, i8* %A, i32 %indvars.iv
-  %l3 = load i8, i8* %l2, align 4
+  %l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv
+  %l3 = load i8, ptr %l2, align 4
   %l3e = zext i8 %l3 to i32
   %l9 = and i32 %sum.02, %l3e
   %indvars.iv.next = add i32 %indvars.iv, 1

diff  --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll
index f76dfd483ec40..db7a765547307 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll
@@ -3,7 +3,7 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define i32 @reduction_sum_single(i32* noalias nocapture %A) {
+define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_sum_single(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -15,18 +15,14 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 8
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 12
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 8
+; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 12
+; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP9]] = add i32 [[TMP8]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD4]])
@@ -57,8 +53,8 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
   %l7 = add i32 %sum.02, %l3
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -70,7 +66,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @predicated(i32* noalias nocapture %A) {
+define i32 @predicated(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @predicated(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -93,8 +89,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -103,8 +99,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; CHECK:       pred.load.if7:
 ; CHECK-NEXT:    [[TMP10:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
 ; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP12]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; CHECK:       pred.load.continue8:
@@ -113,8 +109,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP15]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; CHECK:       pred.load.if9:
 ; CHECK-NEXT:    [[TMP16:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]]
-; CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP16]]
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
 ; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP18]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
 ; CHECK:       pred.load.continue10:
@@ -123,8 +119,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP21]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; CHECK:       pred.load.if11:
 ; CHECK-NEXT:    [[TMP22:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP22]]
-; CHECK-NEXT:    [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP22]]
+; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
 ; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP24]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
 ; CHECK:       pred.load.continue12:
@@ -133,8 +129,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP27]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
 ; CHECK:       pred.load.if13:
 ; CHECK-NEXT:    [[TMP28:%.*]] = or i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP28]]
-; CHECK-NEXT:    [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4
+; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP28]]
+; CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
 ; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <4 x i32> poison, i32 [[TMP30]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; CHECK:       pred.load.continue14:
@@ -143,8 +139,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP33]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
 ; CHECK:       pred.load.if15:
 ; CHECK-NEXT:    [[TMP34:%.*]] = or i64 [[INDEX]], 5
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP34]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP34]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP36]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
 ; CHECK:       pred.load.continue16:
@@ -153,8 +149,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP39]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
 ; CHECK:       pred.load.if17:
 ; CHECK-NEXT:    [[TMP40:%.*]] = or i64 [[INDEX]], 6
-; CHECK-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP40]]
-; CHECK-NEXT:    [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4
+; CHECK-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP40]]
+; CHECK-NEXT:    [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4
 ; CHECK-NEXT:    [[TMP43:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP42]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
 ; CHECK:       pred.load.continue18:
@@ -163,8 +159,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP45]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
 ; CHECK:       pred.load.if19:
 ; CHECK-NEXT:    [[TMP46:%.*]] = or i64 [[INDEX]], 7
-; CHECK-NEXT:    [[TMP47:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP46]]
-; CHECK-NEXT:    [[TMP48:%.*]] = load i32, i32* [[TMP47]], align 4
+; CHECK-NEXT:    [[TMP47:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP46]]
+; CHECK-NEXT:    [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4
 ; CHECK-NEXT:    [[TMP49:%.*]] = insertelement <4 x i32> [[TMP44]], i32 [[TMP48]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
 ; CHECK:       pred.load.continue20:
@@ -173,8 +169,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP51]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
 ; CHECK:       pred.load.if21:
 ; CHECK-NEXT:    [[TMP52:%.*]] = or i64 [[INDEX]], 8
-; CHECK-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP52]]
-; CHECK-NEXT:    [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4
+; CHECK-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP52]]
+; CHECK-NEXT:    [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4
 ; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i32> poison, i32 [[TMP54]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
 ; CHECK:       pred.load.continue22:
@@ -183,8 +179,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP57]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
 ; CHECK:       pred.load.if23:
 ; CHECK-NEXT:    [[TMP58:%.*]] = or i64 [[INDEX]], 9
-; CHECK-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP58]]
-; CHECK-NEXT:    [[TMP60:%.*]] = load i32, i32* [[TMP59]], align 4
+; CHECK-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP58]]
+; CHECK-NEXT:    [[TMP60:%.*]] = load i32, ptr [[TMP59]], align 4
 ; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i32> [[TMP56]], i32 [[TMP60]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
 ; CHECK:       pred.load.continue24:
@@ -193,8 +189,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP63]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
 ; CHECK:       pred.load.if25:
 ; CHECK-NEXT:    [[TMP64:%.*]] = or i64 [[INDEX]], 10
-; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP64]]
-; CHECK-NEXT:    [[TMP66:%.*]] = load i32, i32* [[TMP65]], align 4
+; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP64]]
+; CHECK-NEXT:    [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4
 ; CHECK-NEXT:    [[TMP67:%.*]] = insertelement <4 x i32> [[TMP62]], i32 [[TMP66]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
 ; CHECK:       pred.load.continue26:
@@ -203,8 +199,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP69]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
 ; CHECK:       pred.load.if27:
 ; CHECK-NEXT:    [[TMP70:%.*]] = or i64 [[INDEX]], 11
-; CHECK-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP70]]
-; CHECK-NEXT:    [[TMP72:%.*]] = load i32, i32* [[TMP71]], align 4
+; CHECK-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP70]]
+; CHECK-NEXT:    [[TMP72:%.*]] = load i32, ptr [[TMP71]], align 4
 ; CHECK-NEXT:    [[TMP73:%.*]] = insertelement <4 x i32> [[TMP68]], i32 [[TMP72]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
 ; CHECK:       pred.load.continue28:
@@ -213,8 +209,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP75]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
 ; CHECK:       pred.load.if29:
 ; CHECK-NEXT:    [[TMP76:%.*]] = or i64 [[INDEX]], 12
-; CHECK-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP76]]
-; CHECK-NEXT:    [[TMP78:%.*]] = load i32, i32* [[TMP77]], align 4
+; CHECK-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP76]]
+; CHECK-NEXT:    [[TMP78:%.*]] = load i32, ptr [[TMP77]], align 4
 ; CHECK-NEXT:    [[TMP79:%.*]] = insertelement <4 x i32> poison, i32 [[TMP78]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
 ; CHECK:       pred.load.continue30:
@@ -223,8 +219,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP81]], label [[PRED_LOAD_IF31:%.*]], label [[PRED_LOAD_CONTINUE32:%.*]]
 ; CHECK:       pred.load.if31:
 ; CHECK-NEXT:    [[TMP82:%.*]] = or i64 [[INDEX]], 13
-; CHECK-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP82]]
-; CHECK-NEXT:    [[TMP84:%.*]] = load i32, i32* [[TMP83]], align 4
+; CHECK-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP82]]
+; CHECK-NEXT:    [[TMP84:%.*]] = load i32, ptr [[TMP83]], align 4
 ; CHECK-NEXT:    [[TMP85:%.*]] = insertelement <4 x i32> [[TMP80]], i32 [[TMP84]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE32]]
 ; CHECK:       pred.load.continue32:
@@ -233,8 +229,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP87]], label [[PRED_LOAD_IF33:%.*]], label [[PRED_LOAD_CONTINUE34:%.*]]
 ; CHECK:       pred.load.if33:
 ; CHECK-NEXT:    [[TMP88:%.*]] = or i64 [[INDEX]], 14
-; CHECK-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP88]]
-; CHECK-NEXT:    [[TMP90:%.*]] = load i32, i32* [[TMP89]], align 4
+; CHECK-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP88]]
+; CHECK-NEXT:    [[TMP90:%.*]] = load i32, ptr [[TMP89]], align 4
 ; CHECK-NEXT:    [[TMP91:%.*]] = insertelement <4 x i32> [[TMP86]], i32 [[TMP90]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE34]]
 ; CHECK:       pred.load.continue34:
@@ -243,8 +239,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP93]], label [[PRED_LOAD_IF35:%.*]], label [[PRED_LOAD_CONTINUE36]]
 ; CHECK:       pred.load.if35:
 ; CHECK-NEXT:    [[TMP94:%.*]] = or i64 [[INDEX]], 15
-; CHECK-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP94]]
-; CHECK-NEXT:    [[TMP96:%.*]] = load i32, i32* [[TMP95]], align 4
+; CHECK-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP94]]
+; CHECK-NEXT:    [[TMP96:%.*]] = load i32, ptr [[TMP95]], align 4
 ; CHECK-NEXT:    [[TMP97:%.*]] = insertelement <4 x i32> [[TMP92]], i32 [[TMP96]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE36]]
 ; CHECK:       pred.load.continue36:
@@ -284,8 +280,8 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
   %l7 = add i32 %sum.02, %l3
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -297,7 +293,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
+define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) {
 ; CHECK-LABEL: @cond_rdx_pred(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -341,8 +337,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP12]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
 ; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -351,8 +347,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP21]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
 ; CHECK:       pred.load.if15:
 ; CHECK-NEXT:    [[TMP22:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP22]]
-; CHECK-NEXT:    [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP22]]
+; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
 ; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP24]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
 ; CHECK:       pred.load.continue16:
@@ -361,8 +357,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP27]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
 ; CHECK:       pred.load.if17:
 ; CHECK-NEXT:    [[TMP28:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP28]]
-; CHECK-NEXT:    [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4
+; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP28]]
+; CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
 ; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <4 x i32> [[TMP26]], i32 [[TMP30]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
 ; CHECK:       pred.load.continue18:
@@ -371,8 +367,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP33]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
 ; CHECK:       pred.load.if19:
 ; CHECK-NEXT:    [[TMP34:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP34]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP34]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
 ; CHECK:       pred.load.continue20:
@@ -381,8 +377,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP39]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
 ; CHECK:       pred.load.if21:
 ; CHECK-NEXT:    [[TMP40:%.*]] = or i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP40]]
-; CHECK-NEXT:    [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4
+; CHECK-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP40]]
+; CHECK-NEXT:    [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4
 ; CHECK-NEXT:    [[TMP43:%.*]] = insertelement <4 x i32> poison, i32 [[TMP42]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
 ; CHECK:       pred.load.continue22:
@@ -391,8 +387,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP45]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
 ; CHECK:       pred.load.if23:
 ; CHECK-NEXT:    [[TMP46:%.*]] = or i64 [[INDEX]], 5
-; CHECK-NEXT:    [[TMP47:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP46]]
-; CHECK-NEXT:    [[TMP48:%.*]] = load i32, i32* [[TMP47]], align 4
+; CHECK-NEXT:    [[TMP47:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP46]]
+; CHECK-NEXT:    [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4
 ; CHECK-NEXT:    [[TMP49:%.*]] = insertelement <4 x i32> [[TMP44]], i32 [[TMP48]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
 ; CHECK:       pred.load.continue24:
@@ -401,8 +397,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP51]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
 ; CHECK:       pred.load.if25:
 ; CHECK-NEXT:    [[TMP52:%.*]] = or i64 [[INDEX]], 6
-; CHECK-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP52]]
-; CHECK-NEXT:    [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4
+; CHECK-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP52]]
+; CHECK-NEXT:    [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4
 ; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i32> [[TMP50]], i32 [[TMP54]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
 ; CHECK:       pred.load.continue26:
@@ -411,8 +407,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP57]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
 ; CHECK:       pred.load.if27:
 ; CHECK-NEXT:    [[TMP58:%.*]] = or i64 [[INDEX]], 7
-; CHECK-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP58]]
-; CHECK-NEXT:    [[TMP60:%.*]] = load i32, i32* [[TMP59]], align 4
+; CHECK-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP58]]
+; CHECK-NEXT:    [[TMP60:%.*]] = load i32, ptr [[TMP59]], align 4
 ; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i32> [[TMP56]], i32 [[TMP60]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
 ; CHECK:       pred.load.continue28:
@@ -421,8 +417,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP63]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
 ; CHECK:       pred.load.if29:
 ; CHECK-NEXT:    [[TMP64:%.*]] = or i64 [[INDEX]], 8
-; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP64]]
-; CHECK-NEXT:    [[TMP66:%.*]] = load i32, i32* [[TMP65]], align 4
+; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP64]]
+; CHECK-NEXT:    [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4
 ; CHECK-NEXT:    [[TMP67:%.*]] = insertelement <4 x i32> poison, i32 [[TMP66]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
 ; CHECK:       pred.load.continue30:
@@ -431,8 +427,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP69]], label [[PRED_LOAD_IF31:%.*]], label [[PRED_LOAD_CONTINUE32:%.*]]
 ; CHECK:       pred.load.if31:
 ; CHECK-NEXT:    [[TMP70:%.*]] = or i64 [[INDEX]], 9
-; CHECK-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP70]]
-; CHECK-NEXT:    [[TMP72:%.*]] = load i32, i32* [[TMP71]], align 4
+; CHECK-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP70]]
+; CHECK-NEXT:    [[TMP72:%.*]] = load i32, ptr [[TMP71]], align 4
 ; CHECK-NEXT:    [[TMP73:%.*]] = insertelement <4 x i32> [[TMP68]], i32 [[TMP72]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE32]]
 ; CHECK:       pred.load.continue32:
@@ -441,8 +437,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP75]], label [[PRED_LOAD_IF33:%.*]], label [[PRED_LOAD_CONTINUE34:%.*]]
 ; CHECK:       pred.load.if33:
 ; CHECK-NEXT:    [[TMP76:%.*]] = or i64 [[INDEX]], 10
-; CHECK-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP76]]
-; CHECK-NEXT:    [[TMP78:%.*]] = load i32, i32* [[TMP77]], align 4
+; CHECK-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP76]]
+; CHECK-NEXT:    [[TMP78:%.*]] = load i32, ptr [[TMP77]], align 4
 ; CHECK-NEXT:    [[TMP79:%.*]] = insertelement <4 x i32> [[TMP74]], i32 [[TMP78]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE34]]
 ; CHECK:       pred.load.continue34:
@@ -451,8 +447,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP81]], label [[PRED_LOAD_IF35:%.*]], label [[PRED_LOAD_CONTINUE36:%.*]]
 ; CHECK:       pred.load.if35:
 ; CHECK-NEXT:    [[TMP82:%.*]] = or i64 [[INDEX]], 11
-; CHECK-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP82]]
-; CHECK-NEXT:    [[TMP84:%.*]] = load i32, i32* [[TMP83]], align 4
+; CHECK-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP82]]
+; CHECK-NEXT:    [[TMP84:%.*]] = load i32, ptr [[TMP83]], align 4
 ; CHECK-NEXT:    [[TMP85:%.*]] = insertelement <4 x i32> [[TMP80]], i32 [[TMP84]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE36]]
 ; CHECK:       pred.load.continue36:
@@ -461,8 +457,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP87]], label [[PRED_LOAD_IF37:%.*]], label [[PRED_LOAD_CONTINUE38:%.*]]
 ; CHECK:       pred.load.if37:
 ; CHECK-NEXT:    [[TMP88:%.*]] = or i64 [[INDEX]], 12
-; CHECK-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP88]]
-; CHECK-NEXT:    [[TMP90:%.*]] = load i32, i32* [[TMP89]], align 4
+; CHECK-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP88]]
+; CHECK-NEXT:    [[TMP90:%.*]] = load i32, ptr [[TMP89]], align 4
 ; CHECK-NEXT:    [[TMP91:%.*]] = insertelement <4 x i32> poison, i32 [[TMP90]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE38]]
 ; CHECK:       pred.load.continue38:
@@ -471,8 +467,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP93]], label [[PRED_LOAD_IF39:%.*]], label [[PRED_LOAD_CONTINUE40:%.*]]
 ; CHECK:       pred.load.if39:
 ; CHECK-NEXT:    [[TMP94:%.*]] = or i64 [[INDEX]], 13
-; CHECK-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP94]]
-; CHECK-NEXT:    [[TMP96:%.*]] = load i32, i32* [[TMP95]], align 4
+; CHECK-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP94]]
+; CHECK-NEXT:    [[TMP96:%.*]] = load i32, ptr [[TMP95]], align 4
 ; CHECK-NEXT:    [[TMP97:%.*]] = insertelement <4 x i32> [[TMP92]], i32 [[TMP96]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE40]]
 ; CHECK:       pred.load.continue40:
@@ -481,8 +477,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP99]], label [[PRED_LOAD_IF41:%.*]], label [[PRED_LOAD_CONTINUE42:%.*]]
 ; CHECK:       pred.load.if41:
 ; CHECK-NEXT:    [[TMP100:%.*]] = or i64 [[INDEX]], 14
-; CHECK-NEXT:    [[TMP101:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP100]]
-; CHECK-NEXT:    [[TMP102:%.*]] = load i32, i32* [[TMP101]], align 4
+; CHECK-NEXT:    [[TMP101:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP100]]
+; CHECK-NEXT:    [[TMP102:%.*]] = load i32, ptr [[TMP101]], align 4
 ; CHECK-NEXT:    [[TMP103:%.*]] = insertelement <4 x i32> [[TMP98]], i32 [[TMP102]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE42]]
 ; CHECK:       pred.load.continue42:
@@ -491,8 +487,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP105]], label [[PRED_LOAD_IF43:%.*]], label [[PRED_LOAD_CONTINUE44]]
 ; CHECK:       pred.load.if43:
 ; CHECK-NEXT:    [[TMP106:%.*]] = or i64 [[INDEX]], 15
-; CHECK-NEXT:    [[TMP107:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP106]]
-; CHECK-NEXT:    [[TMP108:%.*]] = load i32, i32* [[TMP107]], align 4
+; CHECK-NEXT:    [[TMP107:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP106]]
+; CHECK-NEXT:    [[TMP108:%.*]] = load i32, ptr [[TMP107]], align 4
 ; CHECK-NEXT:    [[TMP109:%.*]] = insertelement <4 x i32> [[TMP104]], i32 [[TMP108]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE44]]
 ; CHECK:       pred.load.continue44:
@@ -540,8 +536,8 @@ for.body:
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %load = load i32, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %load = load i32, ptr %arrayidx
   %mul = mul nsw i32 %load, %sum
   br label %for.inc
 

diff  --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
index 23e77d6123c2c..ad03fe52b1096 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
@@ -3,7 +3,7 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define i32 @reduction_sum_single(i32* noalias nocapture %A) {
+define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_sum_single(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -12,9 +12,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP3]] = add i32 [[TMP2]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -36,8 +35,8 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
   %l7 = add i32 %sum.02, %l3
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -49,7 +48,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_sum(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -59,12 +58,10 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]])
@@ -91,10 +88,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l6 = trunc i64 %indvars.iv to i32
   %l7 = add i32 %sum.02, %l6
   %l8 = add i32 %l7, %l3
@@ -109,7 +106,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_sum_const(i32* noalias nocapture %A) {
+define i32 @reduction_sum_const(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_sum_const(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -118,9 +115,8 @@ define i32 @reduction_sum_const(i32* noalias nocapture %A) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP4]] = add i32 [[TMP3]], 12
@@ -143,8 +139,8 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
   %l7 = add i32 %sum.02, %l3
   %l9 = add i32 %l7, 3
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -157,7 +153,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_prod(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -167,12 +163,10 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[VEC_IND]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP4]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]])
@@ -199,10 +193,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l6 = trunc i64 %indvars.iv to i32
   %l7 = mul i32 %prod.02, %l6
   %l8 = mul i32 %l7, %l3
@@ -217,7 +211,7 @@ entry:
   ret i32 %prod.0.lcssa
 }
 
-define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_mix(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -227,12 +221,10 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]])
 ; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[TMP5]], [[VEC_PHI]]
@@ -258,10 +250,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l6 = mul nsw i32 %l5, %l3
   %l7 = trunc i64 %indvars.iv to i32
   %l8 = add i32 %sum.02, %l7
@@ -276,7 +268,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_mul(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -285,12 +277,10 @@ define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 19, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP4]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD1]])
@@ -314,10 +304,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 19, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l6 = mul i32 %sum.02, %l3
   %l7 = mul i32 %l6, %l5
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -330,7 +320,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @start_at_non_zero(i32* nocapture %in, i32* nocapture %coeff, i32* nocapture %out) {
+define i32 @start_at_non_zero(ptr nocapture %in, ptr nocapture %coeff, ptr nocapture %out) {
 ; CHECK-LABEL: @start_at_non_zero(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -339,12 +329,10 @@ define i32 @start_at_non_zero(i32* nocapture %in, i32* nocapture %coeff, i32* no
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 120, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[COEFF:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COEFF:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
 ; CHECK-NEXT:    [[TMP6]] = add i32 [[TMP5]], [[VEC_PHI]]
@@ -367,10 +355,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %sum.09 = phi i32 [ %add, %for.body ], [ 120, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %in, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %coeff, i64 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %in, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %coeff, i64 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %mul = mul nsw i32 %l1, %l0
   %add = add nsw i32 %mul, %sum.09
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -383,7 +371,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_and(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -392,12 +380,10 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ -1, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[WIDE_LOAD1]])
@@ -421,10 +407,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = and i32 %result.08, %l0
   %and = and i32 %add, %l1
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -437,7 +423,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_or(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -446,12 +432,10 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP4]])
 ; CHECK-NEXT:    [[TMP6]] = or i32 [[TMP5]], [[VEC_PHI]]
@@ -474,10 +458,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %l1, %l0
   %or = or i32 %add, %result.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -490,7 +474,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_xor(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -499,12 +483,10 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP4]])
 ; CHECK-NEXT:    [[TMP6]] = xor i32 [[TMP5]], [[VEC_PHI]]
@@ -527,10 +509,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %l1, %l0
   %xor = xor i32 %add, %result.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -543,7 +525,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
+define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_fadd(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -552,12 +534,10 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[VEC_PHI]], <4 x float> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP5]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP4]], <4 x float> [[WIDE_LOAD1]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -579,10 +559,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %l1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %l1 = load float, ptr %arrayidx2, align 4
   %add = fadd fast float %result.08, %l0
   %fadd = fadd fast float %add, %l1
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -595,7 +575,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret float %result.0.lcssa
 }
 
-define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
+define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_fmul(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -604,12 +584,10 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast float [[TMP4]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD1]])
@@ -633,10 +611,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %l1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %l1 = load float, ptr %arrayidx2, align 4
   %add = fmul fast float %result.08, %l0
   %fmul = fmul fast float %add, %l1
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -649,7 +627,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret float %result.0.lcssa
 }
 
-define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_min(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -658,9 +636,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP3]] = call i32 @llvm.smin.i32(i32 [[TMP2]], i32 [[VEC_PHI]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -682,8 +659,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
   %c0 = icmp slt i32 %result.08, %l0
   %v0 = select i1 %c0, i32 %result.08, i32 %l0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -696,7 +673,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_max(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -705,9 +682,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP3]] = call i32 @llvm.umax.i32(i32 [[TMP2]], i32 [[VEC_PHI]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -729,8 +705,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
   %c0 = icmp ugt i32 %result.08, %l0
   %v0 = select i1 %c0, i32 %result.08, i32 %l0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -744,7 +720,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ; Sub we can create a reduction, but not inloop
-define i32 @reduction_sub_lhs(i32* noalias nocapture %A) {
+define i32 @reduction_sub_lhs(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_sub_lhs(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -753,9 +729,8 @@ define i32 @reduction_sub_lhs(i32* noalias nocapture %A) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2]] = sub <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
@@ -777,8 +752,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
   %sub = sub nsw i32 %x.05, %l0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -791,7 +766,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ; Conditional reductions with multi-input phis.
-define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
+define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
 ; CHECK-LABEL: @reduction_conditional(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -801,12 +776,10 @@ define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[PREDPHI3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[TMP1]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[TMP3]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
 ; CHECK-NEXT:    [[TMP7:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
@@ -849,10 +822,10 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
   %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %l1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %l1 = load float, ptr %arrayidx2, align 4
   %cmp3 = fcmp ogt float %l0, %l1
   br i1 %cmp3, label %if.then, label %for.inc
 
@@ -884,15 +857,15 @@ for.end:
   ret float %sum.1.lcssa
 }
 
-define i32 @reduction_sum_multiuse(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_sum_multiuse(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_sum_multiuse(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
 ; CHECK:       .lr.ph:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], [[DOTLR_PH]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[L2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[L3:%.*]] = load i32, i32* [[L2]], align 4
+; CHECK-NEXT:    [[L2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[L3:%.*]] = load i32, ptr [[L2]], align 4
 ; CHECK-NEXT:    [[L6:%.*]] = trunc i64 [[INDVARS_IV]] to i32
 ; CHECK-NEXT:    [[L7:%.*]] = add i32 [[SUM_02]], [[L6]]
 ; CHECK-NEXT:    [[L8:%.*]] = add i32 [[L7]], [[L3]]
@@ -910,10 +883,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l10, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l6 = trunc i64 %indvars.iv to i32
   %l7 = add i32 %sum.02, %l6
   %l8 = add i32 %l7, %l3
@@ -930,7 +903,7 @@ end:
 }
 
 ; Predicated loop, cannot (yet) use in-loop reductions.
-define i32 @reduction_predicated(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_predicated(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_predicated(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -940,12 +913,10 @@ define i32 @reduction_predicated(i32* noalias nocapture %A, i32* noalias nocaptu
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]])
@@ -972,10 +943,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l6 = trunc i64 %indvars.iv to i32
   %l7 = add i32 %sum.02, %l6
   %l8 = add i32 %l7, %l3
@@ -990,7 +961,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
+define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_add_trunc(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -1000,9 +971,8 @@ define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i8> [ <i8 -1, i8 0, i8 0, i8 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[TMP3]] = add <4 x i8> [[VEC_PHI]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
@@ -1025,8 +995,8 @@ entry:
   %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ]
   %sum.02 = and i32 %sum.02p, 255
-  %l2 = getelementptr inbounds i8, i8* %A, i32 %indvars.iv
-  %l3 = load i8, i8* %l2, align 4
+  %l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv
+  %l3 = load i8, ptr %l2, align 4
   %l3e = zext i8 %l3 to i32
   %l9 = add i32 %sum.02, %l3e
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -1040,7 +1010,7 @@ entry:
 }
 
 
-define i8 @reduction_and_trunc(i8* noalias nocapture %A) {
+define i8 @reduction_and_trunc(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_and_trunc(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -1050,9 +1020,8 @@ define i8 @reduction_and_trunc(i8* noalias nocapture %A) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i8> [ <i8 -1, i8 -1, i8 -1, i8 -1>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[TMP3]] = and <4 x i8> [[VEC_PHI]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
@@ -1075,8 +1044,8 @@ entry:
   %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ]
   %sum.02 = and i32 %sum.02p, 255
-  %l2 = getelementptr inbounds i8, i8* %A, i32 %indvars.iv
-  %l3 = load i8, i8* %l2, align 4
+  %l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv
+  %l3 = load i8, ptr %l2, align 4
   %l3e = zext i8 %l3 to i32
   %l9 = and i32 %sum.02, %l3e
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -1090,7 +1059,7 @@ entry:
 }
 
 ; Test case when loop has a call to the llvm.fmuladd intrinsic.
-define float @reduction_fmuladd(float* %a, float* %b, i64 %n) {
+define float @reduction_fmuladd(ptr %a, ptr %b, i64 %n) {
 ; CHECK-LABEL: @reduction_fmuladd(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
@@ -1101,12 +1070,10 @@ define float @reduction_fmuladd(float* %a, float* %b, i64 %n) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP4]])
 ; CHECK-NEXT:    [[TMP6]] = fadd float [[TMP5]], [[VEC_PHI]]
@@ -1123,10 +1090,10 @@ define float @reduction_fmuladd(float* %a, float* %b, i64 %n) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load float, float* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load float, float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP8]], float [[TMP9]], float [[SUM_07]])
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
@@ -1142,10 +1109,10 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %b, i64 %iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %iv
+  %1 = load float, ptr %arrayidx2, align 4
   %muladd = tail call float @llvm.fmuladd.f32(float %0, float %1, float %sum.07)
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n

diff  --git a/llvm/test/Transforms/LoopVectorize/reduction-order.ll b/llvm/test/Transforms/LoopVectorize/reduction-order.ll
index 08e5781d3626e..7d8613b794f48 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-order.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-order.ll
@@ -15,7 +15,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 ; CHECK-NEXT: select <4 x i1> {{.*}}, <4 x i32> [[ADD_3]], <4 x i32>
 ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body
 ;
-define internal i64 @foo(i32* %t0) !prof !1 {
+define internal i64 @foo(ptr %t0) !prof !1 {
 t16:
   br label %t20
 

diff  --git a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll
index 83d71fa5d0719..536142f3c5240 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll
@@ -3,7 +3,7 @@
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
-define i32 @reduction_sum_single(i32* noalias nocapture %A) {
+define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_sum_single(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -17,8 +17,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -27,8 +27,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -37,8 +37,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP13:%.*]] = or i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -47,8 +47,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP19:%.*]] = or i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP19]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
 ; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -76,8 +76,8 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
   %l7 = add i32 %sum.02, %l3
   %indvars.iv.next = add i32 %indvars.iv, 1
   %exitcond = icmp eq i32 %indvars.iv.next, 257
@@ -88,7 +88,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_sum(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -102,11 +102,11 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -116,11 +116,11 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -130,11 +130,11 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -144,11 +144,11 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -179,10 +179,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l7 = add i32 %sum.02, %indvars.iv
   %l8 = add i32 %l7, %l3
   %l9 = add i32 %l8, %l5
@@ -195,7 +195,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_prod(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -209,11 +209,11 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -223,11 +223,11 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -237,11 +237,11 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -251,11 +251,11 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -285,10 +285,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l8 = mul i32 %prod.02, %l3
   %l9 = mul i32 %l8, %l5
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -300,7 +300,7 @@ entry:
   ret i32 %prod.0.lcssa
 }
 
-define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_and(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -314,11 +314,11 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -328,11 +328,11 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -342,11 +342,11 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -356,11 +356,11 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -390,10 +390,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = and i32 %result.08, %l0
   %and = and i32 %add, %l1
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -405,7 +405,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_or(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -419,11 +419,11 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -433,11 +433,11 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -447,11 +447,11 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -461,11 +461,11 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -495,10 +495,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %l1, %l0
   %or = or i32 %add, %result.08
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -510,7 +510,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_xor(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -524,11 +524,11 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -538,11 +538,11 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -552,11 +552,11 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -566,11 +566,11 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -600,10 +600,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %l1, %l0
   %xor = xor i32 %add, %result.08
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -615,7 +615,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
+define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_fadd(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -629,11 +629,11 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -643,11 +643,11 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load float, float* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[B]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load float, float* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -657,11 +657,11 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, float* [[B]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load float, float* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load float, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -671,11 +671,11 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load float, float* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, float* [[B]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load float, float* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, ptr [[B]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load float, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -705,10 +705,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i32 %indvars.iv
-  %l1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i32 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i32 %indvars.iv
+  %l1 = load float, ptr %arrayidx2, align 4
   %add = fadd fast float %result.08, %l0
   %fadd = fadd fast float %add, %l1
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -720,7 +720,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret float %result.0.lcssa
 }
 
-define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
+define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_fmul(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -734,11 +734,11 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -748,11 +748,11 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load float, float* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[B]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load float, float* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -762,11 +762,11 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, float* [[B]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load float, float* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load float, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -776,11 +776,11 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load float, float* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, float* [[B]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load float, float* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, ptr [[B]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load float, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -810,10 +810,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i32 %indvars.iv
-  %l1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i32 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i32 %indvars.iv
+  %l1 = load float, ptr %arrayidx2, align 4
   %add = fmul fast float %result.08, %l0
   %fmul = fmul fast float %add, %l1
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -825,7 +825,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret float %result.0.lcssa
 }
 
-define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_min(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -839,8 +839,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -849,8 +849,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -859,8 +859,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP13:%.*]] = or i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -869,8 +869,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP19:%.*]] = or i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP19]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
 ; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -898,8 +898,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
   %c0 = icmp slt i32 %result.08, %l0
   %v0 = select i1 %c0, i32 %result.08, i32 %l0
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -911,7 +911,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_max(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -925,8 +925,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -935,8 +935,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -945,8 +945,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP13:%.*]] = or i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -955,8 +955,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP19:%.*]] = or i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP19]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
 ; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -984,8 +984,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
   %c0 = icmp ugt i32 %result.08, %l0
   %v0 = select i1 %c0, i32 %result.08, i32 %l0
   %indvars.iv.next = add i32 %indvars.iv, 1

diff  --git a/llvm/test/Transforms/LoopVectorize/reduction-ptr.ll b/llvm/test/Transforms/LoopVectorize/reduction-ptr.ll
index 85b42c91a8f2b..0656cd2b2aa94 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-ptr.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-ptr.ll
@@ -5,21 +5,21 @@ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 
 ; Reductions of pointer types are not supported.
 
-define void @PR49215(i32* %p, i32* %q) {
+define void @PR49215(ptr %p, ptr %q) {
 ; CHECK-LABEL: @PR49215(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[G:%.*]] = phi i32* [ [[P:%.*]], [[ENTRY]] ], [ [[UMIN:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32* [[Q:%.*]], [[G]]
-; CHECK-NEXT:    [[UMIN]] = select i1 [[CMP2]], i32* [[Q]], i32* [[G]]
+; CHECK-NEXT:    [[G:%.*]] = phi ptr [ [[P:%.*]], [[ENTRY]] ], [ [[UMIN:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult ptr [[Q:%.*]], [[G]]
+; CHECK-NEXT:    [[UMIN]] = select i1 [[CMP2]], ptr [[Q]], ptr [[G]]
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], undef
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOPEXIT:%.*]], label [[FOR_BODY]]
 ; CHECK:       loopexit:
-; CHECK-NEXT:    [[UMIN_LCSSA:%.*]] = phi i32* [ [[UMIN]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[PHI_CAST:%.*]] = ptrtoint i32* [[UMIN_LCSSA]] to i64
+; CHECK-NEXT:    [[UMIN_LCSSA:%.*]] = phi ptr [ [[UMIN]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[PHI_CAST:%.*]] = ptrtoint ptr [[UMIN_LCSSA]] to i64
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -27,14 +27,14 @@ entry:
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %g = phi i32* [ %p, %entry ], [ %umin, %for.body ]
-  %cmp2 = icmp ult i32* %q, %g
-  %umin = select i1 %cmp2, i32* %q, i32* %g
+  %g = phi ptr [ %p, %entry ], [ %umin, %for.body ]
+  %cmp2 = icmp ult ptr %q, %g
+  %umin = select i1 %cmp2, ptr %q, ptr %g
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, undef
   br i1 %exitcond, label %loopexit, label %for.body
 
 loopexit:
-  %phi.cast = ptrtoint i32* %umin to i64
+  %phi.cast = ptrtoint ptr %umin to i64
   ret void
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll
index b8f00f10fae2c..278917b0069c8 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction.ll
@@ -8,17 +8,17 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK: add <4 x i32>
 ;CHECK: call i32 @llvm.vector.reduce.add.v4i32(<4 x i32>
 ;CHECK: ret i32
-define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+define i32 @reduction_sum(i32 %n, ptr noalias nocapture %A, ptr noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = trunc i64 %indvars.iv to i32
   %7 = add i32 %sum.02, %6
   %8 = add i32 %7, %3
@@ -39,17 +39,17 @@ define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
 ;CHECK: mul <4 x i32>
 ;CHECK: call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>
 ;CHECK: ret i32
-define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+define i32 @reduction_prod(i32 %n, ptr noalias nocapture %A, ptr noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %prod.02 = phi i32 [ %9, %.lr.ph ], [ 1, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = trunc i64 %indvars.iv to i32
   %7 = mul i32 %prod.02, %6
   %8 = mul i32 %7, %3
@@ -70,17 +70,17 @@ define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocap
 ;CHECK: mul nsw <4 x i32>
 ;CHECK: call i32 @llvm.vector.reduce.add.v4i32(<4 x i32>
 ;CHECK: ret i32
-define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+define i32 @reduction_mix(i32 %n, ptr noalias nocapture %A, ptr noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = mul nsw i32 %5, %3
   %7 = trunc i64 %indvars.iv to i32
   %8 = add i32 %sum.02, %7
@@ -99,17 +99,17 @@ define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
 ;CHECK: mul <4 x i32>
 ;CHECK: call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>
 ;CHECK: ret i32
-define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+define i32 @reduction_mul(i32 %n, ptr noalias nocapture %A, ptr noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %sum.02 = phi i32 [ %9, %.lr.ph ], [ 19, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = trunc i64 %indvars.iv to i32
   %7 = add i32 %3, %6
   %8 = add i32 %7, %5
@@ -129,7 +129,7 @@ define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
 ;CHECK: <i32 120, i32 0, i32 0, i32 0>
 ;CHECK: call i32 @llvm.vector.reduce.add.v4i32(<4 x i32>
 ;CHECK: ret i32
-define i32 @start_at_non_zero(i32* nocapture %in, i32* nocapture %coeff, i32* nocapture %out, i32 %n) nounwind uwtable readonly ssp {
+define i32 @start_at_non_zero(ptr nocapture %in, ptr nocapture %coeff, ptr nocapture %out, i32 %n) nounwind uwtable readonly ssp {
 entry:
   %cmp7 = icmp sgt i32 %n, 0
   br i1 %cmp7, label %for.body, label %for.end
@@ -137,10 +137,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %sum.09 = phi i32 [ %add, %for.body ], [ 120, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %in, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %coeff, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %in, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %coeff, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %mul = mul nsw i32 %1, %0
   %add = add nsw i32 %mul, %sum.09
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -158,7 +158,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: and <4 x i32>
 ;CHECK: call i32 @llvm.vector.reduce.and.v4i32(<4 x i32>
 ;CHECK: ret i32
-define i32 @reduction_and(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+define i32 @reduction_and(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwind uwtable readonly {
 entry:
   %cmp7 = icmp sgt i32 %n, 0
   br i1 %cmp7, label %for.body, label %for.end
@@ -166,10 +166,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %and = and i32 %add, %result.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -186,7 +186,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: or <4 x i32>
 ;CHECK: call i32 @llvm.vector.reduce.or.v4i32(<4 x i32>
 ;CHECK: ret i32
-define i32 @reduction_or(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+define i32 @reduction_or(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwind uwtable readonly {
 entry:
   %cmp7 = icmp sgt i32 %n, 0
   br i1 %cmp7, label %for.body, label %for.end
@@ -194,10 +194,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %or = or i32 %add, %result.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -214,7 +214,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: xor <4 x i32>
 ;CHECK: call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>
 ;CHECK: ret i32
-define i32 @reduction_xor(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+define i32 @reduction_xor(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwind uwtable readonly {
 entry:
   %cmp7 = icmp sgt i32 %n, 0
   br i1 %cmp7, label %for.body, label %for.end
@@ -222,10 +222,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %xor = xor i32 %add, %result.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -243,7 +243,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK-NOT: phi <4 x i32>
 ;CHECK-NOT: sub nsw <4 x i32>
 ;CHECK: ret i32
-define i32 @reduction_sub_rhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly {
+define i32 @reduction_sub_rhs(i32 %n, ptr noalias nocapture %A) nounwind uwtable readonly {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
@@ -251,8 +251,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %sub = sub nsw i32 %0, %x.05
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -270,7 +270,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: phi <4 x i32>
 ;CHECK: sub <4 x i32>
 ;CHECK: ret i32
-define i32 @reduction_sub_lhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly {
+define i32 @reduction_sub_lhs(i32 %n, ptr noalias nocapture %A) nounwind uwtable readonly {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
@@ -278,8 +278,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %sub = sub nsw i32 %x.05, %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -295,17 +295,17 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: reduction_conditional
 ; CHECK: fadd fast <4 x float>
 
-define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
+define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
   %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %cmp3 = fcmp ogt float %0, %1
   br i1 %cmp3, label %if.then, label %for.inc
 
@@ -340,17 +340,17 @@ for.end:
 ; We can't vectorize reductions with phi inputs from outside the reduction.
 ; CHECK: noreduction_phi
 ; CHECK-NOT: fadd <4 x float>
-define float @noreduction_phi(float* %A, float* %B, float* %C, float %S) {
+define float @noreduction_phi(ptr %A, ptr %B, ptr %C, float %S) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
   %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %cmp3 = fcmp ogt float %0, %1
   br i1 %cmp3, label %if.then, label %for.inc
 
@@ -386,7 +386,7 @@ for.end:
 ; CHECK: noredux_header_phi
 ; CHECK-NOT: fadd <4 x float>
 
-define float @noredux_header_phi(float* %A, float* %B, float* %C, float %S)  {
+define float @noredux_header_phi(ptr %A, ptr %B, ptr %C, float %S)  {
 entry:
   br label %for.body
 
@@ -394,8 +394,8 @@ for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %sum2.09 = phi float [ 0.000000e+00, %entry ], [ %add1, %for.body ]
   %sum.08 = phi float [ %S, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd fast float %sum.08, %0
   %add1 = fadd fast float %sum2.09, %add
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -469,7 +469,7 @@ exit:
 ;CHECK: call i32 @llvm.vector.reduce.add.v4i32(<4 x i32>
 ;CHECK: %sum.copy = phi i32 [ %[[SCALAR:.*]], %.lr.ph ], [ %[[VECTOR:.*]], %middle.block ]
 ;CHECK: ret i32
-define i32 @reduction_sum_multiuse(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_sum_multiuse(i32 %n, ptr noalias nocapture %A, ptr noalias nocapture %B) {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph.preheader, label %end
 .lr.ph.preheader:                                 ; preds = %0
@@ -478,10 +478,10 @@ define i32 @reduction_sum_multiuse(i32 %n, i32* noalias nocapture %A, i32* noali
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
   %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %.lr.ph.preheader ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = trunc i64 %indvars.iv to i32
   %7 = add i32 %sum.02, %6
   %8 = add i32 %7, %3
@@ -507,7 +507,7 @@ end:
 ; variable. We cannot vectorize this.
 ; CHECK-LABEL: reduction_reset(
 ; CHECK-NOT: <4 x i32>
-define void @reduction_reset(i32 %N, i32* nocapture readonly %arrayA, i32* nocapture %arrayB) {
+define void @reduction_reset(i32 %N, ptr nocapture readonly %arrayA, ptr nocapture %arrayB) {
 entry:
   %c4 = icmp sgt i32 %N, 0
   br i1 %c4, label %.lr.ph.preheader, label %._crit_edge
@@ -520,8 +520,8 @@ entry:
 .lr.ph:                                           ; preds = %.lr.ph, %.lr.ph.preheader
   %indvars.iv = phi i64 [ 0, %.lr.ph.preheader ], [ %indvars.iv.next, %.lr.ph ]
   %.017 = phi i32 [ 100, %.lr.ph.preheader ], [ %csel, %.lr.ph ]
-  %c6 = getelementptr inbounds i32, i32* %arrayA, i64 %indvars.iv
-  %c7 = load i32, i32* %c6, align 4
+  %c6 = getelementptr inbounds i32, ptr %arrayA, i64 %indvars.iv
+  %c7 = load i32, ptr %c6, align 4
   %c8 = icmp sgt i32 %c7, 0
   %c9 = add nsw i32 %c7, %.017
   %csel = select i1 %c8, i32 %c9, i32 0
@@ -537,13 +537,13 @@ entry:
 ._crit_edge:                                      ; preds = %._crit_edge.loopexit, %entry
   %.015.lcssa = phi i64 [ -1, %entry ], [ %phitmp19, %._crit_edge.loopexit ]
   %.0.lcssa = phi i32 [ 100, %entry ], [ %csel.lcssa, %._crit_edge.loopexit ]
-  %c10 = getelementptr inbounds i32, i32* %arrayB, i64 %.015.lcssa
-  store i32 %.0.lcssa, i32* %c10, align 4
+  %c10 = getelementptr inbounds i32, ptr %arrayB, i64 %.015.lcssa
+  store i32 %.0.lcssa, ptr %c10, align 4
   ret void
 }
 
 ; Can vectorize reduction with redundant single-operand phi input.
-define i64 @reduction_with_phi_with_one_incoming_on_backedge(i16 %n, i64* %A) {
+define i64 @reduction_with_phi_with_one_incoming_on_backedge(i16 %n, ptr %A) {
 ; CHECK-LABEL: @reduction_with_phi_with_one_incoming_on_backedge
 ; CHECK: add <4 x i64>
 ;
@@ -553,8 +553,8 @@ entry:
 loop.header:
   %iv = phi i16 [ 1, %entry ], [ %iv.next, %loop.latch ]
   %sum = phi i64 [ 0, %entry ], [ %phi.sum.next, %loop.latch ]
-  %gep.A = getelementptr i64, i64* %A, i16 %iv
-  %lv.A = load i64, i64* %gep.A
+  %gep.A = getelementptr i64, ptr %A, i16 %iv
+  %lv.A = load i64, ptr %gep.A
   %sum.next = add nsw i64 %sum, %lv.A
   br label %loop.bb
 
@@ -573,7 +573,7 @@ exit:
 }
 
 ; Can vectorize reduction with redundant two-operand phi input.
-define i64 @reduction_with_phi_with_two_incoming_on_backedge(i16 %n, i64* %A) {
+define i64 @reduction_with_phi_with_two_incoming_on_backedge(i16 %n, ptr %A) {
 ; CHECK-LABEL: @reduction_with_phi_with_two_incoming_on_backedge
 ; CHECK: add <4 x i64>
 ;
@@ -583,8 +583,8 @@ entry:
 loop.header:
   %iv = phi i16 [ 1, %entry ], [ %iv.next, %loop.latch ]
   %sum = phi i64 [ 0, %entry ], [ %phi.sum.next, %loop.latch ]
-  %gep.A = getelementptr i64, i64* %A, i16 %iv
-  %lv.A = load i64, i64* %gep.A
+  %gep.A = getelementptr i64, ptr %A, i16 %iv
+  %lv.A = load i64, ptr %gep.A
   %sum.next = add nsw i64 %sum, %lv.A
   %cmp.0 = icmp eq i64 %lv.A, 29
   br i1 %cmp.0, label %loop.bb, label %loop.latch

diff  --git a/llvm/test/Transforms/LoopVectorize/remarks-multi-exit-loops.ll b/llvm/test/Transforms/LoopVectorize/remarks-multi-exit-loops.ll
index 36d6882a1641f..2a5240e73c6f8 100644
--- a/llvm/test/Transforms/LoopVectorize/remarks-multi-exit-loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/remarks-multi-exit-loops.ll
@@ -2,7 +2,7 @@
 
 ; Make sure LV does not crash when generating remarks for loops with non-unique
 ; exit blocks.
-define i32 @test_non_unique_exit_blocks(i32* nocapture readonly align 4 dereferenceable(1024) %data, i32 %x) {
+define i32 @test_non_unique_exit_blocks(ptr nocapture readonly align 4 dereferenceable(1024) %data, i32 %x) {
 ; CHECK: loop not vectorized: could not determine number of loop iterations
 ;
 entry:
@@ -15,8 +15,8 @@ for.header:                                         ; preds = %for.cond.lr.ph, %
   br i1 %exitcond.not, label %header.exit, label %for.latch
 
 for.latch:
-  %arrayidx = getelementptr inbounds i32, i32* %data, i64 %iv
-  %lv = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %data, i64 %iv
+  %lv = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp eq i32 %lv, %x
   br i1 %cmp1, label %latch.exit, label %for.header
 

diff  --git a/llvm/test/Transforms/LoopVectorize/remove_metadata.ll b/llvm/test/Transforms/LoopVectorize/remove_metadata.ll
index e8c88a8aba73e..96de172a0ea7c 100644
--- a/llvm/test/Transforms/LoopVectorize/remove_metadata.ll
+++ b/llvm/test/Transforms/LoopVectorize/remove_metadata.ll
@@ -6,16 +6,16 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; CHECK-LABEL: @disable_nonforced_enable(
 ; CHECK: store <2 x i32>
-define void @disable_nonforced_enable(i32* nocapture %a, i32 %n) {
+define void @disable_nonforced_enable(ptr nocapture %a, i32 %n) {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
   %0 = trunc i64 %indvars.iv to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n

diff  --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
index 8b2e812c2c896..5a95945f9db81 100644
--- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
@@ -10,7 +10,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK: %[[a0:.+]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK: %[[a4:.+]] = add i64 [[OFFSET_IDX]], -4
 
-define i32 @reverse_induction_i64(i64 %startval, i32 * %ptr) {
+define i32 @reverse_induction_i64(i64 %startval, ptr %ptr) {
 entry:
   br label %for.body
 
@@ -19,8 +19,8 @@ for.body:
   %i.06 = phi i32 [ 0, %entry ], [ %inc4, %for.body ]
   %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ]
   %add.i = add i64 %add.i7, -1
-  %kind_.i = getelementptr inbounds i32, i32* %ptr, i64 %add.i
-  %tmp.i1 = load i32, i32* %kind_.i, align 4
+  %kind_.i = getelementptr inbounds i32, ptr %ptr, i64 %add.i
+  %tmp.i1 = load i32, ptr %kind_.i, align 4
   %inc.redux = add i32 %tmp.i1, %redux5
   %inc4 = add i32 %i.06, 1
   %exitcond = icmp ne i32 %inc4, 1024
@@ -36,7 +36,7 @@ loopend:
 ; CHECK: %[[a0:.+]] = add i128 [[OFFSET_IDX]], 0
 ; CHECK: %[[a4:.+]] = add i128 [[OFFSET_IDX]], -4
 
-define i32 @reverse_induction_i128(i128 %startval, i32 * %ptr) {
+define i32 @reverse_induction_i128(i128 %startval, ptr %ptr) {
 entry:
   br label %for.body
 
@@ -45,8 +45,8 @@ for.body:
   %i.06 = phi i32 [ 0, %entry ], [ %inc4, %for.body ]
   %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ]
   %add.i = add i128 %add.i7, -1
-  %kind_.i = getelementptr inbounds i32, i32* %ptr, i128 %add.i
-  %tmp.i1 = load i32, i32* %kind_.i, align 4
+  %kind_.i = getelementptr inbounds i32, ptr %ptr, i128 %add.i
+  %tmp.i1 = load i32, ptr %kind_.i, align 4
   %inc.redux = add i32 %tmp.i1, %redux5
   %inc4 = add i32 %i.06, 1
   %exitcond = icmp ne i32 %inc4, 1024
@@ -62,7 +62,7 @@ loopend:
 ; CHECK: %[[a0:.+]] = add i16 [[OFFSET_IDX]], 0
 ; CHECK: %[[a4:.+]] = add i16 [[OFFSET_IDX]], -4
 
-define i32 @reverse_induction_i16(i16 %startval, i32 * %ptr) {
+define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) {
 entry:
   br label %for.body
 
@@ -71,8 +71,8 @@ for.body:
   %i.06 = phi i32 [ 0, %entry ], [ %inc4, %for.body ]
   %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ]
   %add.i = add i16 %add.i7, -1
-  %kind_.i = getelementptr inbounds i32, i32* %ptr, i16 %add.i
-  %tmp.i1 = load i32, i32* %kind_.i, align 4
+  %kind_.i = getelementptr inbounds i32, ptr %ptr, i16 %add.i
+  %tmp.i1 = load i32, ptr %kind_.i, align 4
   %inc.redux = add i32 %tmp.i1, %redux5
   %inc4 = add i32 %i.06, 1
   %exitcond = icmp ne i32 %inc4, 1024
@@ -114,8 +114,8 @@ while.body:
   %forward_induction.05 = phi i8 [ 0, %entry ], [ %inc, %while.body ]
   %inc = add i8 %forward_induction.05, 1
   %conv = zext i8 %inc to i32
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %conv, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, -1
   %0 = trunc i64 %indvars.iv to i32
   %cmp = icmp sgt i32 %0, 0
@@ -140,8 +140,8 @@ while.body:
   %forward_induction.05 = phi i8 [ -127, %entry ], [ %inc, %while.body ]
   %inc = add i8 %forward_induction.05, 1
   %conv = sext i8 %inc to i32
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %conv, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, -1
   %0 = trunc i64 %indvars.iv to i32
   %cmp = icmp sgt i32 %0, 0

diff  --git a/llvm/test/Transforms/LoopVectorize/reverse_iter.ll b/llvm/test/Transforms/LoopVectorize/reverse_iter.ll
index 45b7d092cf429..43e916b48265e 100644
--- a/llvm/test/Transforms/LoopVectorize/reverse_iter.ll
+++ b/llvm/test/Transforms/LoopVectorize/reverse_iter.ll
@@ -16,7 +16,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK-LABEL: @foo(
 ;CHECK:  <i32 0, i32 -1, i32 -2, i32 -3>
 ;CHECK: ret
-define i32 @foo(i32 %n, i32* nocapture %A) {
+define i32 @foo(i32 %n, ptr nocapture %A) {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
@@ -30,8 +30,8 @@ define i32 @foo(i32 %n, i32* nocapture %A) {
   %4 = trunc i64 %indvars.iv to i32
   %5 = shl nsw i32 %4, 1
   %6 = sext i32 %5 to i64
-  %7 = getelementptr inbounds i32, i32* %A, i64 %6
-  %8 = load i32, i32* %7, align 4
+  %7 = getelementptr inbounds i32, ptr %A, i64 %6
+  %8 = load i32, ptr %7, align 4
   %9 = add nsw i32 %8, %sum.01
   %indvars.iv.next = add i64 %indvars.iv, -1
   %10 = trunc i64 %indvars.iv.next to i32

diff  --git a/llvm/test/Transforms/LoopVectorize/runtime-check-address-space.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-address-space.ll
index 77fddd40eadb0..1a8bbfd73b289 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check-address-space.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check-address-space.ll
@@ -25,7 +25,7 @@ target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-
 
 ; Both parameters are unidentified objects with the same address
 ; space, so this should vectorize normally.
-define void @foo(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %n) #0 {
+define void @foo(ptr addrspace(1) %a, ptr addrspace(1) %b, i32 %n) #0 {
 ; CHECK-LABEL: @foo(
 ; CHECK: <4 x i32>
 ; CHECK: ret
@@ -37,12 +37,12 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
-  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %idxprom
-  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %b, i64 %idxprom
+  %0 = load i32, ptr addrspace(1) %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
-  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom1
-  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %idxprom1
+  store i32 %mul, ptr addrspace(1) %arrayidx2, align 4
   %inc = add nsw i32 %i.02, 1
   %cmp = icmp slt i32 %inc, %n
   br i1 %cmp, label %for.body, label %for.end
@@ -52,7 +52,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ; Parameters are unidentified and 
diff erent address spaces, so cannot vectorize.
-define void @bar0(i32* %a, i32 addrspace(1)* %b, i32 %n) #0 {
+define void @bar0(ptr %a, ptr addrspace(1) %b, i32 %n) #0 {
 ; CHECK-LABEL: @bar0(
 ; CHECK-NOT: <4 x i32>
 ; CHECK: ret
@@ -64,12 +64,12 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
-  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %idxprom
-  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %b, i64 %idxprom
+  %0 = load i32, ptr addrspace(1) %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1
-  store i32 %mul, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %idxprom1
+  store i32 %mul, ptr %arrayidx2, align 4
   %inc = add nsw i32 %i.02, 1
   %cmp = icmp slt i32 %inc, %n
   br i1 %cmp, label %for.body, label %for.end
@@ -79,7 +79,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ; Swapped arguments should be the same
-define void @bar1(i32 addrspace(1)* %a, i32* %b, i32 %n) #0 {
+define void @bar1(ptr addrspace(1) %a, ptr %b, i32 %n) #0 {
 ; CHECK-LABEL: @bar1(
 ; CHECK-NOT: <4 x i32>
 ; CHECK: ret
@@ -91,12 +91,12 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %idxprom
+  %0 = load i32, ptr %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
-  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom1
-  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %idxprom1
+  store i32 %mul, ptr addrspace(1) %arrayidx2, align 4
   %inc = add nsw i32 %i.02, 1
   %cmp = icmp slt i32 %inc, %n
   br i1 %cmp, label %for.body, label %for.end
@@ -107,7 +107,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; We should still be able to vectorize with noalias even if the
 ; address spaces are 
diff erent.
-define void @bar2(i32* noalias %a, i32 addrspace(1)* noalias %b, i32 %n) #0 {
+define void @bar2(ptr noalias %a, ptr addrspace(1) noalias %b, i32 %n) #0 {
 ; CHECK-LABEL: @bar2(
 ; CHECK: <4 x i32>
 ; CHECK: ret
@@ -119,12 +119,12 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
-  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %idxprom
-  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %b, i64 %idxprom
+  %0 = load i32, ptr addrspace(1) %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1
-  store i32 %mul, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %idxprom1
+  store i32 %mul, ptr %arrayidx2, align 4
   %inc = add nsw i32 %i.02, 1
   %cmp = icmp slt i32 %inc, %n
   br i1 %cmp, label %for.body, label %for.end
@@ -135,7 +135,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; Store to identified global with 
diff erent address space. This isn't
 ; generally safe and shouldn't be vectorized.
-define void @arst0(i32* %b, i32 %n) #0 {
+define void @arst0(ptr %b, i32 %n) #0 {
 ; CHECK-LABEL: @arst0(
 ; CHECK-NOT: <4 x i32>
 ; CHECK: ret
@@ -147,12 +147,12 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %idxprom
+  %0 = load i32, ptr %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
-  %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
-  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds [1024 x i32], ptr addrspace(1) @g_as1, i64 0, i64 %idxprom1
+  store i32 %mul, ptr addrspace(1) %arrayidx2, align 4
   %inc = add nsw i32 %i.02, 1
   %cmp = icmp slt i32 %inc, %n
   br i1 %cmp, label %for.body, label %for.end
@@ -164,7 +164,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; Load from identified global with 
diff erent address space.
 ; This isn't generally safe and shouldn't be vectorized.
-define void @arst1(i32* %b, i32 %n) #0 {
+define void @arst1(ptr %b, i32 %n) #0 {
 ; CHECK-LABEL: @arst1(
 ; CHECK-NOT: <4 x i32>
 ; CHECK: ret
@@ -176,12 +176,12 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom
-  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr addrspace(1) @g_as1, i64 0, i64 %idxprom
+  %0 = load i32, ptr addrspace(1) %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %idxprom1
-  store i32 %mul, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %idxprom1
+  store i32 %mul, ptr %arrayidx2, align 4
   %inc = add nsw i32 %i.02, 1
   %cmp = icmp slt i32 %inc, %n
   br i1 %cmp, label %for.body, label %for.end
@@ -204,12 +204,12 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(2)* @q_as2, i64 0, i64 %idxprom
-  %0 = load i32, i32 addrspace(2)* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr addrspace(2) @q_as2, i64 0, i64 %idxprom
+  %0 = load i32, ptr addrspace(2) %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
-  %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
-  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds [1024 x i32], ptr addrspace(1) @g_as1, i64 0, i64 %idxprom1
+  store i32 %mul, ptr addrspace(1) %arrayidx2, align 4
   %inc = add nsw i32 %i.02, 1
   %cmp = icmp slt i32 %inc, %n
   br i1 %cmp, label %for.body, label %for.end

diff  --git a/llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll
index 22bdc68cd184a..4407cf1b24a45 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll
@@ -8,7 +8,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 ; Test case for PR47751. Make sure the runtime check includes a required
 ; addition of the size of the element type (a pointer) for the end bound.
 
-define void @test(i64 %arg, i32 %arg1, i8** %base) {
+define void @test(i64 %arg, i32 %arg1, ptr %base) {
 ; CHECK:      LAA: Adding RT check for range:
 ; CHECK-NEXT:  Start: ((8 * (zext i32 (-1 + %arg1)<nsw> to i64))<nuw><nsw> + (8 * (1 smin %arg)) + (-8 * %arg) + %base)
 ; CHECK-SAME:  End: (8 + (8 * (zext i32 (-1 + %arg1)<nsw> to i64))<nuw><nsw> + %base)
@@ -26,12 +26,12 @@ loop:
   %iv.2 = phi i32 [ %arg1, %entry ], [ %iv.2.next, %loop ]
   %iv.2.next = add nsw i32 %iv.2, -1
   %iv.2.ext = zext i32 %iv.2.next to i64
-  %idx.1 = getelementptr inbounds i8*, i8** %base, i64 %iv.2.ext
-  %v.1 = load i8*, i8** %idx.1, align 8
-  %idx.2 = getelementptr inbounds i8*, i8** %base, i64 %iv.1
-  %v.2 = load i8*, i8** %idx.2, align 8
-  store i8* %v.2, i8** %idx.1, align 8
-  store i8* %v.1, i8** %idx.2, align 8
+  %idx.1 = getelementptr inbounds ptr, ptr %base, i64 %iv.2.ext
+  %v.1 = load ptr, ptr %idx.1, align 8
+  %idx.2 = getelementptr inbounds ptr, ptr %base, i64 %iv.1
+  %v.2 = load ptr, ptr %idx.2, align 8
+  store ptr %v.2, ptr %idx.1, align 8
+  store ptr %v.1, ptr %idx.2, align 8
   %tmp11 = icmp sgt i64 %iv.1, 1
   %iv.1.next = add nsw i64 %iv.1, -1
   br i1 %tmp11, label %loop, label %exit

diff  --git a/llvm/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
index 51f9d03ecd707..988e1481e1d14 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
 
 
-define void @add_ints_1_1_1(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #0 {
+define void @add_ints_1_1_1(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c) #0 {
 ; CHECK-LABEL: @add_ints_1_1_1(
 ; CHECK: <4 x i32>
 ; CHECK: ret
@@ -14,13 +14,13 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %i.01
-  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %c, i64 %i.01
-  %1 = load i32, i32 addrspace(1)* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %b, i64 %i.01
+  %0 = load i32, ptr addrspace(1) %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr addrspace(1) %c, i64 %i.01
+  %1 = load i32, ptr addrspace(1) %arrayidx1, align 4
   %add = add nsw i32 %0, %1
-  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %i.01
-  store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %i.01
+  store i32 %add, ptr addrspace(1) %arrayidx2, align 4
   %inc = add i64 %i.01, 1
   %cmp = icmp ult i64 %inc, 200
   br i1 %cmp, label %for.body, label %for.end
@@ -29,7 +29,7 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-define void @add_ints_as_1_0_0(i32 addrspace(1)* %a, i32* %b, i32* %c) #0 {
+define void @add_ints_as_1_0_0(ptr addrspace(1) %a, ptr %b, ptr %c) #0 {
 ; CHECK-LABEL: @add_ints_as_1_0_0(
 ; CHECK-NOT: <4 x i32>
 ; CHECK: ret
@@ -39,13 +39,13 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.01
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %c, i64 %i.01
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %i.01
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %c, i64 %i.01
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %0, %1
-  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %i.01
-  store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %i.01
+  store i32 %add, ptr addrspace(1) %arrayidx2, align 4
   %inc = add i64 %i.01, 1
   %cmp = icmp ult i64 %inc, 200
   br i1 %cmp, label %for.body, label %for.end
@@ -54,7 +54,7 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-define void @add_ints_as_0_1_0(i32* %a, i32 addrspace(1)* %b, i32* %c) #0 {
+define void @add_ints_as_0_1_0(ptr %a, ptr addrspace(1) %b, ptr %c) #0 {
 ; CHECK-LABEL: @add_ints_as_0_1_0(
 ; CHECK-NOT: <4 x i32>
 ; CHECK: ret
@@ -64,13 +64,13 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %i.01
-  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %c, i64 %i.01
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %b, i64 %i.01
+  %0 = load i32, ptr addrspace(1) %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %c, i64 %i.01
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %0, %1
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %i.01
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %i.01
+  store i32 %add, ptr %arrayidx2, align 4
   %inc = add i64 %i.01, 1
   %cmp = icmp ult i64 %inc, 200
   br i1 %cmp, label %for.body, label %for.end
@@ -79,7 +79,7 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-define void @add_ints_as_0_1_1(i32* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #0 {
+define void @add_ints_as_0_1_1(ptr %a, ptr addrspace(1) %b, ptr addrspace(1) %c) #0 {
 ; CHECK-LABEL: @add_ints_as_0_1_1(
 ; CHECK-NOT: <4 x i32>
 ; CHECK: ret
@@ -89,13 +89,13 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %i.01
-  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %c, i64 %i.01
-  %1 = load i32, i32 addrspace(1)* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %b, i64 %i.01
+  %0 = load i32, ptr addrspace(1) %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr addrspace(1) %c, i64 %i.01
+  %1 = load i32, ptr addrspace(1) %arrayidx1, align 4
   %add = add nsw i32 %0, %1
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %i.01
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %i.01
+  store i32 %add, ptr %arrayidx2, align 4
   %inc = add i64 %i.01, 1
   %cmp = icmp ult i64 %inc, 200
   br i1 %cmp, label %for.body, label %for.end
@@ -104,7 +104,7 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-define void @add_ints_as_0_1_2(i32* %a, i32 addrspace(1)* %b, i32 addrspace(2)* %c) #0 {
+define void @add_ints_as_0_1_2(ptr %a, ptr addrspace(1) %b, ptr addrspace(2) %c) #0 {
 ; CHECK-LABEL: @add_ints_as_0_1_2(
 ; CHECK-NOT: <4 x i32>
 ; CHECK: ret
@@ -114,13 +114,13 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %i.01
-  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(2)* %c, i64 %i.01
-  %1 = load i32, i32 addrspace(2)* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %b, i64 %i.01
+  %0 = load i32, ptr addrspace(1) %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr addrspace(2) %c, i64 %i.01
+  %1 = load i32, ptr addrspace(2) %arrayidx1, align 4
   %add = add nsw i32 %0, %1
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %i.01
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %i.01
+  store i32 %add, ptr %arrayidx2, align 4
   %inc = add i64 %i.01, 1
   %cmp = icmp ult i64 %inc, 200
   br i1 %cmp, label %for.body, label %for.end

diff  --git a/llvm/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-readonly.ll
index a210059ba2330..26bc36df94103 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check-readonly.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check-readonly.ll
@@ -2,14 +2,14 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define void @add_ints(i32* nocapture %A, i32* nocapture %B, i32* nocapture %C) {
+define void @add_ints(ptr nocapture %A, ptr nocapture %B, ptr nocapture %C) {
 ; CHECK-LABEL: @add_ints(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
 ; CHECK-LABEL: vector.memcheck:
-; CHECK-NEXT:    [[A1:%.*]] = ptrtoint i32* [[A:%.*]] to i64
-; CHECK-NEXT:    [[B2:%.*]] = ptrtoint i32* [[B:%.*]] to i64
-; CHECK-NEXT:    [[C3:%.*]] = ptrtoint i32* [[C:%.*]] to i64
+; CHECK-NEXT:    [[A1:%.*]] = ptrtoint ptr [[A:%.*]] to i64
+; CHECK-NEXT:    [[B2:%.*]] = ptrtoint ptr [[B:%.*]] to i64
+; CHECK-NEXT:    [[C3:%.*]] = ptrtoint ptr [[C:%.*]] to i64
 ; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 [[A1]], [[B2]]
 ; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 [[A1]], [[C3]]
@@ -25,13 +25,13 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 200

diff  --git a/llvm/test/Transforms/LoopVectorize/runtime-drop-crash.ll b/llvm/test/Transforms/LoopVectorize/runtime-drop-crash.ll
index c97433fe8f05a..c76c2c0ef47a2 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-drop-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-drop-crash.ll
@@ -6,23 +6,20 @@
 
 ; CHECK-NOT: vector.body
 
-define void @barney(%struct.foo* %ptr) {
+define void @barney(ptr %ptr) {
 entry:
   br label %loop
 
 loop:
   %tmp3 = phi i64 [ 0, %entry ], [ %tmp18, %loop ]
-  %tmp4 = getelementptr inbounds %struct.foo, %struct.foo* %ptr, i64 undef
-  %tmp5 = bitcast %struct.foo* %tmp4 to i64*
-  store i64 0, i64* %tmp5, align 8
+  %tmp4 = getelementptr inbounds %struct.foo, ptr %ptr, i64 undef
+  store i64 0, ptr %tmp4, align 8
   %tmp8 = add i64 1, %tmp3
-  %tmp10 = getelementptr inbounds %struct.foo, %struct.foo* %ptr, i64 %tmp8
-  %tmp11 = bitcast %struct.foo* %tmp10 to i64*
-  store i64 1, i64* %tmp11, align 8
+  %tmp10 = getelementptr inbounds %struct.foo, ptr %ptr, i64 %tmp8
+  store i64 1, ptr %tmp10, align 8
   %tmp14 = add i64 undef, %tmp3
-  %tmp16 = getelementptr inbounds %struct.foo, %struct.foo* %ptr, i64 %tmp14
-  %tmp17 = bitcast %struct.foo* %tmp16 to i64*
-  store i64 2, i64* %tmp17, align 8
+  %tmp16 = getelementptr inbounds %struct.foo, ptr %ptr, i64 %tmp14
+  store i64 2, ptr %tmp16, align 8
   %tmp18 = add nuw nsw i64 %tmp3, 4
   %c = icmp ult i64 %tmp18, 400
   br i1 %c, label %exit, label %loop

diff  --git a/llvm/test/Transforms/LoopVectorize/safegep.ll b/llvm/test/Transforms/LoopVectorize/safegep.ll
index 8d3cf4bbd38b8..47cf1b1d39772 100644
--- a/llvm/test/Transforms/LoopVectorize/safegep.ll
+++ b/llvm/test/Transforms/LoopVectorize/safegep.ll
@@ -11,19 +11,19 @@ target datalayout = "e-p:32:32:32-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:
 ; CHECK-LABEL: @safe(
 ; CHECK: <4 x float>
 
-define void @safe(float* %A, float* %B, float %K) {
+define void @safe(ptr %A, ptr %B, float %K) {
 entry:
   br label %"<bb 3>"
 
 "<bb 3>":
   %i_15 = phi i32 [ 0, %entry ], [ %i_19, %"<bb 3>" ]
-  %pp3 = getelementptr float, float* %A, i32 %i_15
-  %D.1396_10 = load float, float* %pp3, align 4
-  %pp24 = getelementptr float, float* %B, i32 %i_15
-  %D.1398_15 = load float, float* %pp24, align 4
+  %pp3 = getelementptr float, ptr %A, i32 %i_15
+  %D.1396_10 = load float, ptr %pp3, align 4
+  %pp24 = getelementptr float, ptr %B, i32 %i_15
+  %D.1398_15 = load float, ptr %pp24, align 4
   %D.1399_17 = fadd float %D.1398_15, %K
   %D.1400_18 = fmul float %D.1396_10, %D.1399_17
-  store float %D.1400_18, float* %pp3, align 4
+  store float %D.1400_18, ptr %pp3, align 4
   %i_19 = add nsw i32 %i_15, 1
   %exitcond = icmp ne i32 %i_19, 64
   br i1 %exitcond, label %"<bb 3>", label %return
@@ -37,19 +37,19 @@ return:
 ; CHECK-LABEL: @notsafe(
 ; CHECK-NOT: <4 x float>
 
-define void @notsafe(float addrspace(5) * %A, float* %B, float %K) {
+define void @notsafe(ptr addrspace(5) %A, ptr %B, float %K) {
 entry:
   br label %"<bb 3>"
 
 "<bb 3>":
   %i_15 = phi i32 [ 0, %entry ], [ %i_19, %"<bb 3>" ]
-  %pp3 = getelementptr float, float addrspace(5) * %A, i32 %i_15
-  %D.1396_10 = load float, float addrspace(5) * %pp3, align 4
-  %pp24 = getelementptr float, float* %B, i32 %i_15
-  %D.1398_15 = load float, float* %pp24, align 4
+  %pp3 = getelementptr float, ptr addrspace(5) %A, i32 %i_15
+  %D.1396_10 = load float, ptr addrspace(5) %pp3, align 4
+  %pp24 = getelementptr float, ptr %B, i32 %i_15
+  %D.1398_15 = load float, ptr %pp24, align 4
   %D.1399_17 = fadd float %D.1398_15, %K
   %D.1400_18 = fmul float %D.1396_10, %D.1399_17
-  store float %D.1400_18, float addrspace(5) * %pp3, align 4
+  store float %D.1400_18, ptr addrspace(5) %pp3, align 4
   %i_19 = add nsw i32 %i_15, 1
   %exitcond = icmp ne i32 %i_19, 64
   br i1 %exitcond, label %"<bb 3>", label %return

diff  --git a/llvm/test/Transforms/LoopVectorize/same-base-access.ll b/llvm/test/Transforms/LoopVectorize/same-base-access.ll
index 49c27f9ea5178..802ed14e9e908 100644
--- a/llvm/test/Transforms/LoopVectorize/same-base-access.ll
+++ b/llvm/test/Transforms/LoopVectorize/same-base-access.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; This is kernel11 from "LivermoreLoops". We can't vectorize it because we
 ; access both x[k] and x[k-1].
 ;
-; void kernel11(double *x, double *y, int n) {
+; void kernel11(ptr x, ptr y, int n) {
 ;   for ( int k=1 ; k<n ; k++ )
 ;     x[k] = x[k-1] + y[k];
 ; }
@@ -13,47 +13,47 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK-LABEL: @kernel11(
 ; CHECK-NOT: <4 x double>
 ; CHECK: ret
-define i32 @kernel11(double* %x, double* %y, i32 %n) nounwind uwtable ssp {
-  %1 = alloca double*, align 8
-  %2 = alloca double*, align 8
+define i32 @kernel11(ptr %x, ptr %y, i32 %n) nounwind uwtable ssp {
+  %1 = alloca ptr, align 8
+  %2 = alloca ptr, align 8
   %3 = alloca i32, align 4
   %k = alloca i32, align 4
-  store double* %x, double** %1, align 8
-  store double* %y, double** %2, align 8
-  store i32 %n, i32* %3, align 4
-  store i32 1, i32* %k, align 4
+  store ptr %x, ptr %1, align 8
+  store ptr %y, ptr %2, align 8
+  store i32 %n, ptr %3, align 4
+  store i32 1, ptr %k, align 4
   br label %4
 
 ; <label>:4                                       ; preds = %25, %0
-  %5 = load i32, i32* %k, align 4
-  %6 = load i32, i32* %3, align 4
+  %5 = load i32, ptr %k, align 4
+  %6 = load i32, ptr %3, align 4
   %7 = icmp slt i32 %5, %6
   br i1 %7, label %8, label %28
 
 ; <label>:8                                       ; preds = %4
-  %9 = load i32, i32* %k, align 4
+  %9 = load i32, ptr %k, align 4
   %10 = sub nsw i32 %9, 1
   %11 = sext i32 %10 to i64
-  %12 = load double*, double** %1, align 8
-  %13 = getelementptr inbounds double, double* %12, i64 %11
-  %14 = load double, double* %13, align 8
-  %15 = load i32, i32* %k, align 4
+  %12 = load ptr, ptr %1, align 8
+  %13 = getelementptr inbounds double, ptr %12, i64 %11
+  %14 = load double, ptr %13, align 8
+  %15 = load i32, ptr %k, align 4
   %16 = sext i32 %15 to i64
-  %17 = load double*, double** %2, align 8
-  %18 = getelementptr inbounds double, double* %17, i64 %16
-  %19 = load double, double* %18, align 8
+  %17 = load ptr, ptr %2, align 8
+  %18 = getelementptr inbounds double, ptr %17, i64 %16
+  %19 = load double, ptr %18, align 8
   %20 = fadd double %14, %19
-  %21 = load i32, i32* %k, align 4
+  %21 = load i32, ptr %k, align 4
   %22 = sext i32 %21 to i64
-  %23 = load double*, double** %1, align 8
-  %24 = getelementptr inbounds double, double* %23, i64 %22
-  store double %20, double* %24, align 8
+  %23 = load ptr, ptr %1, align 8
+  %24 = getelementptr inbounds double, ptr %23, i64 %22
+  store double %20, ptr %24, align 8
   br label %25
 
 ; <label>:25                                      ; preds = %8
-  %26 = load i32, i32* %k, align 4
+  %26 = load i32, ptr %k, align 4
   %27 = add nsw i32 %26, 1
-  store i32 %27, i32* %k, align 4
+  store i32 %27, ptr %k, align 4
   br label %4
 
 ; <label>:28                                      ; preds = %4
@@ -77,14 +77,14 @@ define i32 @kernel11(double* %x, double* %y, i32 %n) nounwind uwtable ssp {
 ; CHECK-LABEL: @func2(
 ; CHECK: <4 x i32>
 ; CHECK: ret
-define i32 @func2(i32* nocapture %a) nounwind uwtable ssp {
+define i32 @func2(ptr nocapture %a) nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %7, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %7 ]
   %2 = mul nsw i64 %indvars.iv, 7
-  %3 = getelementptr inbounds i32, i32* %a, i64 %2
-  %4 = load i32, i32* %3, align 4
+  %3 = getelementptr inbounds i32, ptr %a, i64 %2
+  %4 = load i32, ptr %3, align 4
   %5 = icmp sgt i32 %4, 3
   br i1 %5, label %6, label %7
 
@@ -96,7 +96,7 @@ define i32 @func2(i32* nocapture %a) nounwind uwtable ssp {
 ; <label>:7                                       ; preds = %6, %1
   %x.0 = phi i32 [ %tmp1, %6 ], [ %4, %1 ]
   %8 = add nsw i32 %x.0, 3
-  store i32 %8, i32* %3, align 4
+  store i32 %8, ptr %3, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 256

diff  --git a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll
index 416e5075f6d9b..f4c099ff9aa11 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -scalable-vectorization=on -force-target-supports-scalable-vectors=true -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=2  -S | FileCheck %s
 
-define void @test1(float* noalias nocapture %a, float* noalias nocapture readonly %b) {
+define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
 ; CHECK-LABEL: @test1(
 ; CHECK:       vector.body:
 ; CHECK:         [[FCMP1:%.*]] = fcmp ogt <vscale x 2 x float>
@@ -14,13 +14,13 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp ogt float %0, 1.000000e+02
   tail call void @llvm.assume(i1 %cmp1)
   %add = fadd float %0, 1.000000e+00
-  %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, 1599
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
@@ -33,9 +33,9 @@ declare void @llvm.assume(i1) #0
 
 attributes #0 = { nounwind willreturn }
 
-%struct.data = type { float*, float* }
+%struct.data = type { ptr, ptr }
 
-define void @test2(float *%a, float *%b) {
+define void @test2(ptr %a, ptr %b) {
 ; CHECK-LABEL: @test2(
 ; CHECK:       entry:
 ; CHECK:         [[MASKCOND:%.*]] = icmp eq i64 %ptrint1, 0
@@ -46,9 +46,9 @@ define void @test2(float *%a, float *%b) {
 ; CHECK:         tail call void @llvm.assume(i1 [[MASKCOND4]])
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND4]])
 entry:
-  %ptrint1 = ptrtoint float* %a to i64
+  %ptrint1 = ptrtoint ptr %a to i64
   %maskcond = icmp eq i64 %ptrint1, 0
-  %ptrint2 = ptrtoint float* %b to i64
+  %ptrint2 = ptrtoint ptr %b to i64
   %maskcond4 = icmp eq i64 %ptrint2, 0
   br label %for.body
 
@@ -56,12 +56,12 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   tail call void @llvm.assume(i1 %maskcond)
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd float %0, 1.000000e+00
   tail call void @llvm.assume(i1 %maskcond4)
-  %arrayidx5 = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, 1599
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
@@ -73,7 +73,7 @@ for.end:                                          ; preds = %for.body
 ; Test case for PR43620. Make sure we can vectorize with predication in presence
 ; of assume calls. For now, check that we drop all assumes in predicated blocks
 ; in the vector body.
-define void @predicated_assume(float* noalias nocapture readonly %a, float* noalias nocapture %b, i64 %n) {
+define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i64 %n) {
 ; Check that the vector.body does not contain any assumes.
 ; CHECK-LABEL: @predicated_assume(
 ; CHECK:       vector.body:
@@ -94,11 +94,11 @@ if.else:                                          ; preds = %for.body
 
 if.end5:                                          ; preds = %for.body, %if.else
   %x.0 = phi float [ 4.200000e+01, %if.else ], [ 2.300000e+01, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %mul = fmul float %x.0, %0
-  %arrayidx7 = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  store float %mul, float* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  store float %mul, ptr %arrayidx7, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %cmp = icmp eq i64 %indvars.iv.next, %n
   br i1 %cmp, label %for.cond.cleanup, label %for.body, !llvm.loop !0

diff  --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
index 113e008f52455..630a17ece9f32 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
@@ -6,7 +6,7 @@
 ;     b[i] =  a[i] + a[i - 1]
 ; }
 ;
-define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) {
+define void @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) {
 ; CHECK-VF4UF1-LABEL: @recurrence_1
 ; CHECK-VF4UF1: for.preheader
 ; CHECK-VF4UF1: %[[SUB_1:.*]] = add i32 %n, -1
@@ -20,7 +20,7 @@ define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n)
 ; CHECK-VF4UF1: vector.body:
 ; CHECK-VF4UF1: %[[INDEX:.*]] = phi i64 [ 0, %vector.ph ], [ %[[NEXT_IDX:.*]], %vector.body ]
 ; CHECK-VF4UF1: %[[VEC_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[VEC_RECUR_INIT]], %vector.ph ], [ %[[LOAD:.*]], %vector.body ]
-; CHECK-VF4UF1: %[[LOAD]] = load <vscale x 4 x i32>, <vscale x 4 x i32>*
+; CHECK-VF4UF1: %[[LOAD]] = load <vscale x 4 x i32>, ptr
 ; CHECK-VF4UF1: %[[SPLICE:.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VEC_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1)
 ; CHECK-VF4UF1: middle.block:
 ; CHECK-VF4UF1: %[[VSCALE2:.*]] = call i32 @llvm.vscale.i32()
@@ -35,19 +35,18 @@ entry:
   br label %for.preheader
 
 for.preheader:
-  %arrayidx.phi.trans.insert = getelementptr inbounds i32, i32* %a, i64 0
-  %pre_load = load i32, i32* %arrayidx.phi.trans.insert
+  %pre_load = load i32, ptr %a
   br label %scalar.body
 
 scalar.body:
   %0 = phi i32 [ %pre_load, %for.preheader ], [ %1, %scalar.body ]
   %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ]
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %arrayidx32 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-  %1 = load i32, i32* %arrayidx32
-  %arrayidx34 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+  %arrayidx32 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.next
+  %1 = load i32, ptr %arrayidx32
+  %arrayidx34 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
   %add35 = add i32 %1, %0
-  store i32 %add35, i32* %arrayidx34
+  store i32 %add35, ptr %arrayidx34
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
   br i1 %exitcond, label %for.exit, label %scalar.body, !llvm.loop !0
@@ -63,7 +62,7 @@ for.exit:
 ;   return minmax;
 ; }
 ;
-define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) {
+define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) {
 ; CHECK-VF4UF1-LABEL: @recurrence_2
 ; CHECK-VF4UF1: vector.ph:
 ; CHECK-VF4UF1: %[[VSCALE1:.*]] = call i32 @llvm.vscale.i32()
@@ -72,7 +71,7 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) {
 ; CHECK-VF4UF1: %[[VEC_RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %.pre, i32 %[[SUB1]]
 ; CHECK-VF4UF1: vector.body:
 ; CHECK-VF4UF1: %[[VEC_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[VEC_RECUR_INIT]], %vector.ph ], [ %[[LOAD:.*]], %vector.body ]
-; CHECK-VF4UF1: %[[LOAD]] = load <vscale x 4 x i32>, <vscale x 4 x i32>*
+; CHECK-VF4UF1: %[[LOAD]] = load <vscale x 4 x i32>, ptr
 ; CHECK-VF4UF1: %[[REVERSE:.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VEC_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1)
 ; CHECK-VF4UF1: middle.block:
 ; CHECK-VF4UF1: %[[VSCALE2:.*]] = call i32 @llvm.vscale.i32()
@@ -84,8 +83,8 @@ entry:
   br i1 %cmp27, label %for.preheader, label %for.cond.cleanup
 
 for.preheader:
-  %arrayidx2.phi.trans.insert = getelementptr inbounds i32, i32* %a, i64 -1
-  %.pre = load i32, i32* %arrayidx2.phi.trans.insert, align 4
+  %arrayidx2.phi.trans.insert = getelementptr inbounds i32, ptr %a, i64 -1
+  %.pre = load i32, ptr %arrayidx2.phi.trans.insert, align 4
   br label %scalar.body
 
 for.cond.cleanup.loopexit:
@@ -100,8 +99,8 @@ scalar.body:
   %0 = phi i32 [ %.pre, %for.preheader ], [ %1, %scalar.body ]
   %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ]
   %minmax.028 = phi i32 [ undef, %for.preheader ], [ %minmax.0.cond, %scalar.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx, align 4
   %sub3 = sub nsw i32 %1, %0
   %cmp4 = icmp sgt i32 %sub3, 0
   %cond = select i1 %cmp4, i32 %sub3, i32 0
@@ -113,7 +112,7 @@ scalar.body:
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %scalar.body, !llvm.loop !0
 }
 
-define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 %n, float %f, i16 %p) {
+define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, float %f, i16 %p) {
 ; CHECK-VF4UF1: vector.ph:
 ; CHECK-VF4UF1: %[[VSCALE1:.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-VF4UF1: %[[MUL1:.*]] = mul i32 %[[VSCALE1]], 4
@@ -121,7 +120,7 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32
 ; CHECK-VF4UF1: %vector.recur.init = insertelement <vscale x 4 x i16> poison, i16 %0, i32 %[[SUB1]]
 ; CHECK-VF4UF1: vector.body:
 ; CHECK-VF4UF1: %vector.recur = phi <vscale x 4 x i16> [ %vector.recur.init, %vector.ph ], [ %[[L1:.*]], %vector.body ]
-; CHECK-VF4UF1: %[[L1]] = load <vscale x 4 x i16>, <vscale x 4 x i16>*
+; CHECK-VF4UF1: %[[L1]] = load <vscale x 4 x i16>, ptr
 ; CHECK-VF4UF1: %[[SPLICE:.*]] = call <vscale x 4 x i16> @llvm.experimental.vector.splice.nxv4i16(<vscale x 4 x i16> %vector.recur, <vscale x 4 x i16> %[[L1]], i32 -1)
 ; Check also that the casts were not moved needlessly.
 ; CHECK-VF4UF1: sitofp <vscale x 4 x i16> %[[L1]] to <vscale x 4 x double>
@@ -132,13 +131,13 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32
 ; CHECK-VF4UF1: %[[SUB2:.*]] = sub i32 %[[MUL2]], 1
 ; CHECK-VF4UF1: %vector.recur.extract = extractelement <vscale x 4 x i16> %[[L1]], i32 %[[SUB2]]
 entry:
-  %0 = load i16, i16* %a, align 2
+  %0 = load i16, ptr %a, align 2
   %conv = sitofp i16 %0 to double
   %conv1 = fpext float %f to double
   %conv2 = sitofp i16 %p to double
   %mul = fmul fast double %conv2, %conv1
   %sub = fsub fast double %conv, %mul
-  store double %sub, double* %b, align 8
+  store double %sub, ptr %b, align 8
   %cmp25 = icmp sgt i32 %n, 1
   br i1 %cmp25, label %for.preheader, label %for.end
 
@@ -148,14 +147,14 @@ for.preheader:
 scalar.body:
   %1 = phi i16 [ %0, %for.preheader ], [ %2, %scalar.body ]
   %iv = phi i64 [ %iv.next, %scalar.body ], [ 1, %for.preheader ]
-  %arrayidx5 = getelementptr inbounds i16, i16* %a, i64 %iv
-  %2 = load i16, i16* %arrayidx5, align 2
+  %arrayidx5 = getelementptr inbounds i16, ptr %a, i64 %iv
+  %2 = load i16, ptr %arrayidx5, align 2
   %conv6 = sitofp i16 %2 to double
   %conv11 = sitofp i16 %1 to double
   %mul12 = fmul fast double %conv11, %conv1
   %sub13 = fsub fast double %conv6, %mul12
-  %arrayidx15 = getelementptr inbounds double, double* %b, i64 %iv
-  store double %sub13, double* %arrayidx15, align 8
+  %arrayidx15 = getelementptr inbounds double, ptr %b, i64 %iv
+  store double %sub13, ptr %arrayidx15, align 8
   %iv.next = add nuw nsw i64 %iv, 1
   %lftr.wideiv = trunc i64 %iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -195,7 +194,7 @@ for.end:
 ; another at the second last index. We need these 2 extracts because
 ; the first order recurrence phi is used outside the loop, so we require the phi
 ; itself and not its update (addx).
-define i32 @extract_second_last_iteration(i32* %cval, i32 %x)  {
+define i32 @extract_second_last_iteration(ptr %cval, i32 %x)  {
 ; CHECK-VF4UF2-LABEL: @extract_second_last_iteration
 ; CHECK-VF4UF2: vector.ph
 ; CHECK-VF4UF2: call i32 @llvm.vscale.i32()
@@ -239,20 +238,20 @@ for.end:
 
 ; void sink_after(short *a, int n, int *b) {
 ;   for(int i = 0; i < n; i++)
-;     b[i] = (a[i] * a[i + 1]);
+;     b[i] = (aptr a[i + 1]);
 ; }
 
 ; Check that the sext sank after the load in the vector loop.
-define void @sink_after(i16* %a, i32* %b, i64 %n) {
+define void @sink_after(ptr %a, ptr %b, i64 %n) {
 ; CHECK-VF4UF1-LABEL: @sink_after
 ; CHECK-VF4UF1: vector.body
 ; CHECK-VF4UF1: %[[VEC_RECUR:.*]] = phi <vscale x 4 x i16> [ %vector.recur.init, %vector.ph ], [ %[[LOAD:.*]], %vector.body ]
-; CHECK-VF4UF1: %[[LOAD]] = load <vscale x 4 x i16>, <vscale x 4 x i16>*
+; CHECK-VF4UF1: %[[LOAD]] = load <vscale x 4 x i16>, ptr
 ; CHECK-VF4UF1-NEXT: %[[SPLICE:.*]] = call <vscale x 4 x i16> @llvm.experimental.vector.splice.nxv4i16(<vscale x 4 x i16> %[[VEC_RECUR]], <vscale x 4 x i16> %[[LOAD]], i32 -1)
 ; CHECK-VF4UF1-NEXT: sext <vscale x 4 x i16> %[[SPLICE]] to <vscale x 4 x i32>
 ; CHECK-VF4UF1-NEXT: sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
 entry:
-  %.pre = load i16, i16* %a
+  %.pre = load i16, ptr %a
   br label %for.body
 
 for.body:
@@ -260,12 +259,12 @@ for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %conv = sext i16 %0 to i32
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %arrayidx2 = getelementptr inbounds i16, i16* %a, i64 %indvars.iv.next
-  %1 = load i16, i16* %arrayidx2
+  %arrayidx2 = getelementptr inbounds i16, ptr %a, i64 %indvars.iv.next
+  %1 = load i16, ptr %arrayidx2
   %conv3 = sext i16 %1 to i32
   %mul = mul nsw i32 %conv3, %conv
-  %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  store i32 %mul, i32* %arrayidx5
+  %arrayidx5 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  store i32 %mul, ptr %arrayidx5
   %exitcond = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
 

diff  --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
index 02e6eebd18d58..f932164b9e2a9 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
@@ -7,7 +7,7 @@
 ;   }
 ; with an unroll factor (interleave count) of 2.
 
-define void @add_ind64_unrolled(i64* noalias nocapture %a, i64* noalias nocapture readonly %b, i64 %n) {
+define void @add_ind64_unrolled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @add_ind64_unrolled(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -29,26 +29,22 @@ define void @add_ind64_unrolled(i64* noalias nocapture %a, i64* noalias nocaptur
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <vscale x 2 x i64>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP8]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP10:%.*]] = shl i32 [[TMP9]], 1
 ; CHECK-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[TMP7]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i64* [[TMP12]] to <vscale x 2 x i64>*
-; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP13]], align 8
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP11]]
+; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <vscale x 2 x i64>, ptr [[TMP12]], align 8
 ; CHECK-NEXT:    [[TMP14:%.*]] = add nsw <vscale x 2 x i64> [[WIDE_LOAD]], [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = add nsw <vscale x 2 x i64> [[WIDE_LOAD2]], [[STEP_ADD]]
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i64* [[TMP16]] to <vscale x 2 x i64>*
-; CHECK-NEXT:    store <vscale x 2 x i64> [[TMP14]], <vscale x 2 x i64>* [[TMP17]], align 8
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <vscale x 2 x i64> [[TMP14]], ptr [[TMP16]], align 8
 ; CHECK-NEXT:    [[TMP18:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP19:%.*]] = shl i32 [[TMP18]], 1
 ; CHECK-NEXT:    [[TMP20:%.*]] = sext i32 [[TMP19]] to i64
-; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i64, i64* [[TMP16]], i64 [[TMP20]]
-; CHECK-NEXT:    [[TMP22:%.*]] = bitcast i64* [[TMP21]] to <vscale x 2 x i64>*
-; CHECK-NEXT:    store <vscale x 2 x i64> [[TMP15]], <vscale x 2 x i64>* [[TMP22]], align 8
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP16]], i64 [[TMP20]]
+; CHECK-NEXT:    store <vscale x 2 x i64> [[TMP15]], ptr [[TMP21]], align 8
 ; CHECK-NEXT:    [[TMP23:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP24:%.*]] = shl i64 [[TMP23]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP24]]
@@ -63,11 +59,11 @@ define void @add_ind64_unrolled(i64* noalias nocapture %a, i64* noalias nocaptur
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[I_08]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP26]], [[I_08]]
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I_08]]
-; CHECK-NEXT:    store i64 [[ADD]], i64* [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_08]]
+; CHECK-NEXT:    store i64 [[ADD]], ptr [[ARRAYIDX1]], align 8
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
@@ -79,11 +75,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %b, i64 %i.08
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %b, i64 %i.08
+  %0 = load i64, ptr %arrayidx, align 8
   %add = add nsw i64 %0, %i.08
-  %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 %i.08
-  store i64 %add, i64* %arrayidx1, align 8
+  %arrayidx1 = getelementptr inbounds i64, ptr %a, i64 %i.08
+  store i64 %add, ptr %arrayidx1, align 8
   %inc = add nuw nsw i64 %i.08, 1
   %exitcond.not = icmp eq i64 %inc, %n
   br i1 %exitcond.not, label %exit, label %for.body, !llvm.loop !0
@@ -95,7 +91,7 @@ exit:                                 ; preds = %for.body
 
 ; Same as above, except we test with a vectorisation factor of (1, scalable)
 
-define void @add_ind64_unrolled_nxv1i64(i64* noalias nocapture %a, i64* noalias nocapture readonly %b, i64 %n) {
+define void @add_ind64_unrolled_nxv1i64(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @add_ind64_unrolled_nxv1i64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -116,24 +112,20 @@ define void @add_ind64_unrolled_nxv1i64(i64* noalias nocapture %a, i64* noalias
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[TMP6]] to <vscale x 1 x i64>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, <vscale x 1 x i64>* [[TMP7]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[TMP6]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i64* [[TMP10]] to <vscale x 1 x i64>*
-; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <vscale x 1 x i64>, <vscale x 1 x i64>* [[TMP11]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 [[TMP9]]
+; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <vscale x 1 x i64>, ptr [[TMP10]], align 8
 ; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <vscale x 1 x i64> [[WIDE_LOAD]], [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = add nsw <vscale x 1 x i64> [[WIDE_LOAD2]], [[STEP_ADD]]
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i64* [[TMP14]] to <vscale x 1 x i64>*
-; CHECK-NEXT:    store <vscale x 1 x i64> [[TMP12]], <vscale x 1 x i64>* [[TMP15]], align 8
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <vscale x 1 x i64> [[TMP12]], ptr [[TMP14]], align 8
 ; CHECK-NEXT:    [[TMP16:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP17:%.*]] = sext i32 [[TMP16]] to i64
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i64, i64* [[TMP14]], i64 [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i64* [[TMP18]] to <vscale x 1 x i64>*
-; CHECK-NEXT:    store <vscale x 1 x i64> [[TMP13]], <vscale x 1 x i64>* [[TMP19]], align 8
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i64 [[TMP17]]
+; CHECK-NEXT:    store <vscale x 1 x i64> [[TMP13]], ptr [[TMP18]], align 8
 ; CHECK-NEXT:    [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP21:%.*]] = shl i64 [[TMP20]], 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]]
@@ -148,11 +140,11 @@ define void @add_ind64_unrolled_nxv1i64(i64* noalias nocapture %a, i64* noalias
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[I_08]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP23]], [[I_08]]
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I_08]]
-; CHECK-NEXT:    store i64 [[ADD]], i64* [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_08]]
+; CHECK-NEXT:    store i64 [[ADD]], ptr [[ARRAYIDX1]], align 8
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -164,11 +156,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %b, i64 %i.08
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %b, i64 %i.08
+  %0 = load i64, ptr %arrayidx, align 8
   %add = add nsw i64 %0, %i.08
-  %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 %i.08
-  store i64 %add, i64* %arrayidx1, align 8
+  %arrayidx1 = getelementptr inbounds i64, ptr %a, i64 %i.08
+  store i64 %add, ptr %arrayidx1, align 8
   %inc = add nuw nsw i64 %i.08, 1
   %exitcond.not = icmp eq i64 %inc, %n
   br i1 %exitcond.not, label %exit, label %for.body, !llvm.loop !9
@@ -187,7 +179,7 @@ exit:                                 ; preds = %for.body
 ; with an unroll factor (interleave count) of 1.
 
 
-define void @add_unique_ind32(i32* noalias nocapture %a, i64 %n) {
+define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) {
 ; CHECK-LABEL: @add_unique_ind32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -211,9 +203,8 @@ define void @add_unique_ind32(i32* noalias nocapture %a, i64 %n) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    store <vscale x 4 x i32> [[VEC_IND]], <vscale x 4 x i32>* [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <vscale x 4 x i32> [[VEC_IND]], ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP11:%.*]] = shl i64 [[TMP10]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
@@ -230,8 +221,8 @@ define void @add_unique_ind32(i32* noalias nocapture %a, i64 %n) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_08]]
-; CHECK-NEXT:    store i32 [[R_07]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_08]]
+; CHECK-NEXT:    store i32 [[R_07]], ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[R_07]], 2
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
@@ -245,8 +236,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %r.07 = phi i32 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %i.08
-  store i32 %r.07, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %i.08
+  store i32 %r.07, ptr %arrayidx, align 4
   %add = add nuw nsw i32 %r.07, 2
   %inc = add nuw nsw i64 %i.08, 1
   %exitcond.not = icmp eq i64 %inc, %n
@@ -265,7 +256,7 @@ exit:                                 ; preds = %for.body
 ;   }
 ; with an unroll factor (interleave count) of 1.
 
-define void @add_unique_indf32(float* noalias nocapture %a, i64 %n) {
+define void @add_unique_indf32(ptr noalias nocapture %a, i64 %n) {
 ; CHECK-LABEL: @add_unique_indf32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -294,9 +285,8 @@ define void @add_unique_indf32(float* noalias nocapture %a, i64 %n) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP12]] to <vscale x 4 x float>*
-; CHECK-NEXT:    store <vscale x 4 x float> [[VEC_IND]], <vscale x 4 x float>* [[TMP13]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <vscale x 4 x float> [[VEC_IND]], ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP15:%.*]] = shl i64 [[TMP14]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]]
@@ -313,8 +303,8 @@ define void @add_unique_indf32(float* noalias nocapture %a, i64 %n) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_07:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I_08]]
-; CHECK-NEXT:    store float [[R_07]], float* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I_08]]
+; CHECK-NEXT:    store float [[R_07]], ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ADD]] = fadd float [[R_07]], 2.000000e+00
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
@@ -328,8 +318,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %r.07 = phi float [ %add, %for.body ], [ 0.000000e+00, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %i.08
-  store float %r.07, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %i.08
+  store float %r.07, ptr %arrayidx, align 4
   %add = fadd float %r.07, 2.000000e+00
   %inc = add nuw nsw i64 %i.08, 1
   %exitcond.not = icmp eq i64 %inc, %n

diff  --git a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll
index b703cedcddfd0..2b1c32cd144ce 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll
@@ -15,13 +15,11 @@
 
 ; CHECKUF1: vector.body:
 ; CHECKUF1: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; CHECKUF1: %[[IDXB:.*]] = getelementptr inbounds double, double* %b, i64 %index
-; CHECKUF1: %[[IDXB_CAST:.*]] = bitcast double* %[[IDXB]] to <vscale x 4 x double>*
-; CHECKUF1: %wide.load = load <vscale x 4 x double>, <vscale x 4 x double>* %[[IDXB_CAST]], align 8
+; CHECKUF1: %[[IDXB:.*]] = getelementptr inbounds double, ptr %b, i64 %index
+; CHECKUF1: %wide.load = load <vscale x 4 x double>, ptr %[[IDXB]], align 8
 ; CHECKUF1: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i32 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECKUF1: %[[IDXA:.*]] = getelementptr inbounds double, double* %a, i64 %index
-; CHECKUF1: %[[IDXA_CAST:.*]] = bitcast double* %[[IDXA]] to <vscale x 4 x double>*
-; CHECKUF1: store <vscale x 4 x double> %[[FADD]], <vscale x 4 x double>* %[[IDXA_CAST]], align 8
+; CHECKUF1: %[[IDXA:.*]] = getelementptr inbounds double, ptr %a, i64 %index
+; CHECKUF1: store <vscale x 4 x double> %[[FADD]], ptr %[[IDXA]], align 8
 ; CHECKUF1: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64()
 ; CHECKUF1: %[[VSCALEX4:.*]] = shl i64 %[[VSCALE]], 2
 ; CHECKUF1: %index.next = add nuw i64 %index, %[[VSCALEX4]]
@@ -46,33 +44,29 @@
 
 ; CHECKUF2: vector.body:
 ; CHECKUF2: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; CHECKUF2: %[[IDXB:.*]] = getelementptr inbounds double, double* %b, i64 %index
-; CHECKUF2: %[[IDXB_CAST:.*]] = bitcast double* %[[IDXB]] to <vscale x 4 x double>*
-; CHECKUF2: %wide.load = load <vscale x 4 x double>, <vscale x 4 x double>* %[[IDXB_CAST]], align 8
+; CHECKUF2: %[[IDXB:.*]] = getelementptr inbounds double, ptr %b, i64 %index
+; CHECKUF2: %wide.load = load <vscale x 4 x double>, ptr %[[IDXB]], align 8
 ; CHECKUF2: %[[VSCALE:.*]] = call i32 @llvm.vscale.i32()
 ; CHECKUF2: %[[VSCALE2:.*]] = shl i32 %[[VSCALE]], 2
 ; CHECKUF2: %[[VSCALE2_EXT:.*]] = sext i32 %[[VSCALE2]] to i64
-; CHECKUF2: %[[IDXB_NEXT:.*]] = getelementptr inbounds double, double* %[[IDXB]], i64 %[[VSCALE2_EXT]]
-; CHECKUF2: %[[IDXB_NEXT_CAST:.*]] = bitcast double* %[[IDXB_NEXT]] to <vscale x 4 x double>*
-; CHECKUF2: %wide.load{{[0-9]+}} = load <vscale x 4 x double>, <vscale x 4 x double>* %[[IDXB_NEXT_CAST]], align 8
+; CHECKUF2: %[[IDXB_NEXT:.*]] = getelementptr inbounds double, ptr %[[IDXB]], i64 %[[VSCALE2_EXT]]
+; CHECKUF2: %wide.load{{[0-9]+}} = load <vscale x 4 x double>, ptr %[[IDXB_NEXT]], align 8
 ; CHECKUF2: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i32 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECKUF2: %[[FADD_NEXT:.*]] = fadd <vscale x 4 x double> %wide.load{{[0-9]+}}, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i32 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECKUF2: %[[IDXA:.*]] = getelementptr inbounds double, double* %a, i64 %index
-; CHECKUF2: %[[IDXA_CAST:.*]] = bitcast double* %[[IDXA]] to <vscale x 4 x double>*
-; CHECKUF2: store <vscale x 4 x double> %[[FADD]], <vscale x 4 x double>* %[[IDXA_CAST]], align 8
+; CHECKUF2: %[[IDXA:.*]] = getelementptr inbounds double, ptr %a, i64 %index
+; CHECKUF2: store <vscale x 4 x double> %[[FADD]], ptr %[[IDXA]], align 8
 ; CHECKUF2: %[[VSCALE:.*]] = call i32 @llvm.vscale.i32()
 ; CHECKUF2: %[[VSCALE2:.*]] = shl i32 %[[VSCALE]], 2
 ; CHECKUF2: %[[VSCALE2_EXT:.*]] = sext i32 %[[VSCALE2]] to i64
-; CHECKUF2: %[[IDXA_NEXT:.*]] = getelementptr inbounds double, double* %[[IDXA]], i64 %[[VSCALE2_EXT]]
-; CHECKUF2: %[[IDXA_NEXT_CAST:.*]] = bitcast double* %[[IDXA_NEXT]] to <vscale x 4 x double>*
-; CHECKUF2: store <vscale x 4 x double> %[[FADD_NEXT]], <vscale x 4 x double>* %[[IDXA_NEXT_CAST]], align 8
+; CHECKUF2: %[[IDXA_NEXT:.*]] = getelementptr inbounds double, ptr %[[IDXA]], i64 %[[VSCALE2_EXT]]
+; CHECKUF2: store <vscale x 4 x double> %[[FADD_NEXT]], ptr %[[IDXA_NEXT]], align 8
 ; CHECKUF2: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64()
 ; CHECKUF2: %[[VSCALEX8:.*]] = shl i64 %[[VSCALE]], 3
 ; CHECKUF2: %index.next = add nuw i64 %index, %[[VSCALEX8]]
 ; CHECKUF2: %[[CMP:.*]] = icmp eq i64 %index.next, %n.vec
 ; CHECKUF2: br i1 %[[CMP]], label %middle.block, label %vector.body, !llvm.loop !0
 
-define void @loop(i32 %N, double* nocapture %a, double* nocapture readonly %b) {
+define void @loop(i32 %N, ptr nocapture %a, ptr nocapture readonly %b) {
 entry:
   %cmp7 = icmp sgt i32 %N, 0
   br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup
@@ -86,11 +80,11 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %b, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %add = fadd double %0, 1.000000e+00
-  %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
-  store double %add, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
+  store double %add, ptr %arrayidx2, align 8
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1

diff  --git a/llvm/test/Transforms/LoopVectorize/scalable-noalias-scope-decl.ll b/llvm/test/Transforms/LoopVectorize/scalable-noalias-scope-decl.ll
index c18a03fcb7989..7d82daed346ca 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-noalias-scope-decl.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-noalias-scope-decl.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -scalable-vectorization=on -force-target-supports-scalable-vectors=true -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=2  -S | FileCheck %s
 
-define void @test1(float* noalias nocapture %a, float* noalias nocapture readonly %b) {
+define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
 entry:
   br label %for.body
 
@@ -15,13 +15,13 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp ogt float %0, 1.000000e+02
   tail call void @llvm.experimental.noalias.scope.decl(metadata !0)
   %add = fadd float %0, 1.000000e+00
-  %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, 1599
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
@@ -32,9 +32,9 @@ for.end:                                          ; preds = %for.body
 
 declare void @llvm.experimental.noalias.scope.decl(metadata)
 
-%struct.data = type { float*, float* }
+%struct.data = type { ptr, ptr }
 
-define void @test2(float* %a, float* %b) {
+define void @test2(ptr %a, ptr %b) {
 ; CHECK-LABEL: @test2
 ; CHECK: vector.body:
 ; CHECK: @llvm.experimental.noalias.scope.decl(metadata [[SCOPE0_LIST:!.*]])
@@ -46,21 +46,21 @@ define void @test2(float* %a, float* %b) {
 ; CHECK-NOT: @llvm.experimental.noalias.scope.decl
 ; CHECK: ret void
 entry:
-  %ptrint = ptrtoint float* %b to i64
+  %ptrint = ptrtoint ptr %b to i64
   %maskcond = icmp eq i64 %ptrint, 0
-  %ptrint2 = ptrtoint float* %a to i64
+  %ptrint2 = ptrtoint ptr %a to i64
   %maskcond4 = icmp eq i64 %ptrint2, 0
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   tail call void @llvm.experimental.noalias.scope.decl(metadata !0)
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd float %0, 1.000000e+00
   tail call void @llvm.experimental.noalias.scope.decl(metadata !4)
-  %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, 1599
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
@@ -69,7 +69,7 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-define void @predicated_noalias_scope_decl(float* noalias nocapture readonly %a, float* noalias nocapture %b, i64 %n) {
+define void @predicated_noalias_scope_decl(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i64 %n) {
 
 ; Check that the vector.body still contains a llvm.experimental.noalias.scope.decl
 
@@ -99,11 +99,11 @@ if.else:                                          ; preds = %for.body
 
 if.end5:                                          ; preds = %for.body, %if.else
   %x.0 = phi float [ 4.200000e+01, %if.else ], [ 2.300000e+01, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %mul = fmul float %x.0, %0
-  %arrayidx7 = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  store float %mul, float* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  store float %mul, ptr %arrayidx7, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %cmp = icmp eq i64 %indvars.iv.next, %n
   br i1 %cmp, label %for.cond.cleanup, label %for.body, !llvm.loop !5

diff  --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
index 2fb10e6d36fc4..a1c30894e8421 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
@@ -3,7 +3,7 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
+define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_add_trunc(
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
@@ -11,8 +11,8 @@ define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
 ; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP36:%.*]], %vector.body ]
 ; CHECK:         [[TMP14:%.*]] = and <vscale x 8 x i32> [[VEC_PHI]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP15:%.*]] = and <vscale x 8 x i32> [[VEC_PHI1]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
-; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, <vscale x 8 x i8>*
-; CHECK:         [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, <vscale x 8 x i8>*
+; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr
+; CHECK:         [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr
 ; CHECK-NEXT:    [[TMP26:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32>
 ; CHECK-NEXT:    [[TMP27:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i32>
 ; CHECK-NEXT:    [[TMP28:%.*]] = add <vscale x 8 x i32> [[TMP14]], [[TMP26]]
@@ -39,8 +39,8 @@ loop:                                           ; preds = %entry, %loop
   %indvars.iv = phi i32 [ %indvars.iv.next, %loop ], [ 0, %entry ]
   %sum.02p = phi i32 [ %l9, %loop ], [ 255, %entry ]
   %sum.02 = and i32 %sum.02p, 255
-  %l2 = getelementptr inbounds i8, i8* %A, i32 %indvars.iv
-  %l3 = load i8, i8* %l2, align 4
+  %l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv
+  %l3 = load i8, ptr %l2, align 4
   %l3e = zext i8 %l3 to i32
   %l9 = add i32 %sum.02, %l3e
   %indvars.iv.next = add i32 %indvars.iv, 1

diff  --git a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll
index 30455b525b457..a99f875b413e7 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=loop-vectorize,instsimplify -scalable-vectorization=on -force-target-supports-scalable-vectors -S | FileCheck %s
 
-define void @trunc_minimal_bitwidth(i8* %bptr, i16* noalias %hptr, i32 %val, i64 %N) {
+define void @trunc_minimal_bitwidth(ptr %bptr, ptr noalias %hptr, i32 %val, i64 %N) {
 ; CHECK-LABEL: @trunc_minimal_bitwidth(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -19,9 +19,8 @@ define void @trunc_minimal_bitwidth(i8* %bptr, i16* noalias %hptr, i32 %val, i64
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc <vscale x 4 x i32> [[BROADCAST_SPLAT2]] to <vscale x 4 x i16>
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[HPTR:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <vscale x 4 x i16>*
-; CHECK-NEXT:    store <vscale x 4 x i16> [[TMP4]], <vscale x 4 x i16>* [[TMP6]], align 2
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[HPTR:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <vscale x 4 x i16> [[TMP4]], ptr [[TMP5]], align 2
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
@@ -36,8 +35,8 @@ define void @trunc_minimal_bitwidth(i8* %bptr, i16* noalias %hptr, i32 %val, i64
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[CONV21:%.*]] = trunc i32 [[VAL]] to i16
-; CHECK-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, i16* [[HPTR]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store i16 [[CONV21]], i16* [[ARRAYIDX23]], align 2
+; CHECK-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, ptr [[HPTR]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i16 [[CONV21]], ptr [[ARRAYIDX23]], align 2
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -49,11 +48,11 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %0 = load i8, i8* %bptr, align 1
+  %0 = load i8, ptr %bptr, align 1
   %conv = zext i8 %0 to i32
   %conv21 = trunc i32 %val to i16
-  %arrayidx23 = getelementptr inbounds i16, i16* %hptr, i64 %indvars.iv
-  store i16 %conv21, i16* %arrayidx23, align 2
+  %arrayidx23 = getelementptr inbounds i16, ptr %hptr, i64 %indvars.iv
+  store i16 %conv21, ptr %arrayidx23, align 2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %N
   br i1 %exitcond.not, label %for.exit, label %for.body, !llvm.loop !0
@@ -62,7 +61,7 @@ for.exit:
   ret void
 }
 
-define void @trunc_minimal_bitwidths_shufflevector (i8* %p, i32 %arg1, i64 %len) {
+define void @trunc_minimal_bitwidths_shufflevector (ptr %p, i32 %arg1, i64 %len) {
 ; CHECK-LABEL: @trunc_minimal_bitwidths_shufflevector(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -79,14 +78,12 @@ define void @trunc_minimal_bitwidths_shufflevector (i8* %p, i32 %arg1, i64 %len)
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <vscale x 4 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP5]], align 1
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP4]], align 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = trunc <vscale x 4 x i32> [[BROADCAST_SPLAT]] to <vscale x 4 x i8>
 ; CHECK-NEXT:    [[TMP7:%.*]] = xor <vscale x 4 x i8> [[WIDE_LOAD]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul <vscale x 4 x i8> [[TMP7]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP4]] to <vscale x 4 x i8>*
-; CHECK-NEXT:    store <vscale x 4 x i8> [[TMP8]], <vscale x 4 x i8>* [[TMP9]], align 1
+; CHECK-NEXT:    store <vscale x 4 x i8> [[TMP8]], ptr [[TMP4]], align 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP11:%.*]] = mul i64 [[TMP10]], 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
@@ -97,13 +94,13 @@ entry:
 
 for.body:                                         ; preds = %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx
+  %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx
   %conv = zext i8 %0 to i32
   %conv17 = xor i32 %conv, %arg1
   %mul18 = mul nuw nsw i32 %conv17, %conv
   %conv19 = trunc i32 %mul18 to i8
-  store i8 %conv19, i8* %arrayidx
+  store i8 %conv19, ptr %arrayidx
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %len
   br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !0

diff  --git a/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll
index 76712b5817ae3..e51e49e1cde7c 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll
@@ -6,20 +6,20 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK: LV: User VF=vscale x 4 is ignored because scalable vectors are not available.
 ; CHECK: remark: <unknown>:0:0: User-specified vectorization factor vscale x 4 is ignored because the target does not support scalable vectors. The compiler will pick a more suitable value.
 ; CHECK: LV: The Widest register safe to use is: 32 bits.
-define void @test1(i32* %a, i32* %b) {
+define void @test1(ptr %a, ptr %b) {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 4
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0

diff  --git a/llvm/test/Transforms/LoopVectorize/scalar-select.ll b/llvm/test/Transforms/LoopVectorize/scalar-select.ll
index 1cabe82c3ce4d..1905c3694d398 100644
--- a/llvm/test/Transforms/LoopVectorize/scalar-select.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalar-select.ll
@@ -17,14 +17,14 @@ define void @example1(i1 %cond) nounwind uwtable ssp {
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %7 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
   %sel = select i1 %cond, i32 %6, i32 zeroinitializer
-  store i32 %sel, i32* %7, align 4
+  store i32 %sel, ptr %7, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 256

diff  --git a/llvm/test/Transforms/LoopVectorize/scev-during-mutation.ll b/llvm/test/Transforms/LoopVectorize/scev-during-mutation.ll
index 62d492ea004db..f1307dc7a3481 100644
--- a/llvm/test/Transforms/LoopVectorize/scev-during-mutation.ll
+++ b/llvm/test/Transforms/LoopVectorize/scev-during-mutation.ll
@@ -31,7 +31,7 @@ exit:
   ret void
 }
 
-define void @pr49900(i32 %x, i64* %ptr) {
+define void @pr49900(i32 %x, ptr %ptr) {
 ; CHECK-LABEL: @pr49900
 ; CHECK: vector.body{{.*}}:
 ; CHECK: vector.body{{.*}}:
@@ -79,7 +79,7 @@ exit:
 ; CHECK-LABEL: @pr52024(
 ; CHECK: vector.body:
 ;
-define void @pr52024(i32* %dst, i16 %N) {
+define void @pr52024(ptr %dst, i16 %N) {
 entry:
   br label %loop.1
 
@@ -108,8 +108,8 @@ loop.3:
   %sub.phi = phi i16 [ 0, %loop.2.header ], [ %sub, %loop.3 ]
   %sub = sub i16 %sub.phi, %rem.trunc
   %sub.ext = zext i16 %sub to i32
-  %gep.dst = getelementptr i32, i32* %dst, i32 %iv.3
-  store i32 %sub.ext, i32* %gep.dst
+  %gep.dst = getelementptr i32, ptr %dst, i32 %iv.3
+  store i32 %sub.ext, ptr %gep.dst
   %iv.3.next= add nuw nsw i32 %iv.3, 1
   %exitcond.3 = icmp eq i32 %iv.3.next, 34
   br i1 %exitcond.3, label %loop.2.latch, label %loop.3

diff  --git a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll
index 1c5d4ddd3e037..e8c75635f2c69 100644
--- a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll
@@ -1,17 +1,17 @@
 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S < %s | FileCheck %s --check-prefix=CHECK-VF2IC1
 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1IC2
 
-define i32 @pred_select_const_i32_from_icmp(i32* noalias nocapture readonly %src1, i32* noalias nocapture readonly %src2, i64 %n) {
+define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i64 %n) {
 ; CHECK-VF2IC1-LABEL: @pred_select_const_i32_from_icmp(
 ; CHECK-VF2IC1:       vector.body:
 ; CHECK-VF2IC1:         [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, %vector.ph ], [ [[PREDPHI:%.*]], %pred.load.continue2 ]
-; CHECK-VF2IC1:         [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* {{%.*}}, align 4
+; CHECK-VF2IC1:         [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr {{%.*}}, align 4
 ; CHECK-VF2IC1-NEXT:    [[TMP4:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], <i32 35, i32 35>
 ; CHECK-VF2IC1-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
 ; CHECK-VF2IC1-NEXT:    br i1 [[TMP5]], label %pred.load.if, label %pred.load.continue
 ; CHECK-VF2IC1:       pred.load.if:
-; CHECK-VF2IC1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[SRC2:%.*]], i64 {{%.*}}
-; CHECK-VF2IC1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
+; CHECK-VF2IC1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC2:%.*]], i64 {{%.*}}
+; CHECK-VF2IC1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
 ; CHECK-VF2IC1-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
 ; CHECK-VF2IC1-NEXT:    br label %pred.load.continue
 ; CHECK-VF2IC1:       pred.load.continue:
@@ -19,8 +19,8 @@ define i32 @pred_select_const_i32_from_icmp(i32* noalias nocapture readonly %src
 ; CHECK-VF2IC1-NEXT:    [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
 ; CHECK-VF2IC1-NEXT:    br i1 [[TMP10]], label %pred.load.if1, label %pred.load.continue2
 ; CHECK-VF2IC1:       pred.load.if1:
-; CHECK-VF2IC1:         [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[SRC2]], i64 {{%.*}}
-; CHECK-VF2IC1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-VF2IC1:         [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 {{%.*}}
+; CHECK-VF2IC1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-VF2IC1-NEXT:    [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
 ; CHECK-VF2IC1-NEXT:    br label %pred.load.continue2
 ; CHECK-VF2IC1:       pred.load.continue2:
@@ -40,11 +40,11 @@ define i32 @pred_select_const_i32_from_icmp(i32* noalias nocapture readonly %src
 ; CHECK-VF2IC1-NEXT:    br label %for.body
 ; CHECK-VF2IC1:       for.body:
 ; CHECK-VF2IC1:         [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], %for.inc ], [ [[BC_MERGE_RDX]], %scalar.ph ]
-; CHECK-VF2IC1:         [[TMP21:%.*]] = load i32, i32* {{%.*}}, align 4
+; CHECK-VF2IC1:         [[TMP21:%.*]] = load i32, ptr {{%.*}}, align 4
 ; CHECK-VF2IC1-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP21]], 35
 ; CHECK-VF2IC1-NEXT:    br i1 [[CMP1]], label %if.then, label %for.inc
 ; CHECK-VF2IC1:       if.then:
-; CHECK-VF2IC1:         [[TMP22:%.*]] = load i32, i32* {{%.*}}, align 4
+; CHECK-VF2IC1:         [[TMP22:%.*]] = load i32, ptr {{%.*}}, align 4
 ; CHECK-VF2IC1-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[TMP22]], 2
 ; CHECK-VF2IC1-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[CMP3]], i32 1, i32 [[R_012]]
 ; CHECK-VF2IC1-NEXT:    br label %for.inc
@@ -58,23 +58,23 @@ define i32 @pred_select_const_i32_from_icmp(i32* noalias nocapture readonly %src
 ; CHECK-VF1IC2:       vector.body:
 ; CHECK-VF1IC2:         [[VEC_PHI:%.*]] = phi i32 [ 0, %vector.ph ], [ [[PREDPHI:%.*]], %pred.load.continue3 ]
 ; CHECK-VF1IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i32 [ 0, %vector.ph ], [ [[PREDPHI5:%.*]], %pred.load.continue3 ]
-; CHECK-VF1IC2:         [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[SRC1:%.*]], i64 {{%.*}}
-; CHECK-VF1IC2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[SRC1]], i64 {{%.*}}
-; CHECK-VF1IC2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-VF1IC2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4
+; CHECK-VF1IC2:         [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC1:%.*]], i64 {{%.*}}
+; CHECK-VF1IC2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 {{%.*}}
+; CHECK-VF1IC2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-VF1IC2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4
 ; CHECK-VF1IC2-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], 35
 ; CHECK-VF1IC2-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], 35
 ; CHECK-VF1IC2-NEXT:    br i1 [[TMP4]], label %pred.load.if, label %pred.load.continue
 ; CHECK-VF1IC2:       pred.load.if:
-; CHECK-VF1IC2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[SRC2:%.*]], i64 {{%.*}}
-; CHECK-VF1IC2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
+; CHECK-VF1IC2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC2:%.*]], i64 {{%.*}}
+; CHECK-VF1IC2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
 ; CHECK-VF1IC2-NEXT:    br label %pred.load.continue
 ; CHECK-VF1IC2:       pred.load.continue:
 ; CHECK-VF1IC2-NEXT:    [[TMP8:%.*]] = phi i32 [ poison, %vector.body ], [ [[TMP7]], %pred.load.if ]
 ; CHECK-VF1IC2-NEXT:    br i1 [[TMP5]], label %pred.load.if2, label %pred.load.continue3
 ; CHECK-VF1IC2:       pred.load.if2:
-; CHECK-VF1IC2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[SRC2]], i64 {{%.*}}
-; CHECK-VF1IC2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
+; CHECK-VF1IC2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 {{%.*}}
+; CHECK-VF1IC2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
 ; CHECK-VF1IC2-NEXT:    br label %pred.load.continue3
 ; CHECK-VF1IC2:       pred.load.continue3:
 ; CHECK-VF1IC2-NEXT:    [[TMP11:%.*]] = phi i32 [ poison, %pred.load.continue ], [ [[TMP10]], %pred.load.if2 ]
@@ -98,11 +98,11 @@ define i32 @pred_select_const_i32_from_icmp(i32* noalias nocapture readonly %src
 ; CHECK-VF1IC2:       for.body:
 ; CHECK-VF1IC2-NEXT:    [[I_013:%.*]] = phi i64 [ [[INC:%.*]], %for.inc ], [ [[BC_RESUME_VAL]], %scalar.ph ]
 ; CHECK-VF1IC2-NEXT:    [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], %for.inc ], [ [[BC_MERGE_RDX]], %scalar.ph ]
-; CHECK-VF1IC2:         [[TMP19:%.*]] = load i32, i32* {{%.*}}, align 4
+; CHECK-VF1IC2:         [[TMP19:%.*]] = load i32, ptr {{%.*}}, align 4
 ; CHECK-VF1IC2-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP19]], 35
 ; CHECK-VF1IC2-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label %for.inc
 ; CHECK-VF1IC2:       if.then:
-; CHECK-VF1IC2:         [[TMP20:%.*]] = load i32, i32* {{%.*}}, align 4
+; CHECK-VF1IC2:         [[TMP20:%.*]] = load i32, ptr {{%.*}}, align 4
 ; CHECK-VF1IC2-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[TMP20]], 2
 ; CHECK-VF1IC2-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[CMP3]], i32 1, i32 [[R_012]]
 ; CHECK-VF1IC2-NEXT:    br label %for.inc
@@ -119,14 +119,14 @@ entry:
 for.body:                                         ; preds = %entry, %for.inc
   %i.013 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
   %r.012 = phi i32 [ %r.1, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %src1, i64 %i.013
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %src1, i64 %i.013
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 35
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %arrayidx2 = getelementptr inbounds i32, i32* %src2, i64 %i.013
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %src2, i64 %i.013
+  %1 = load i32, ptr %arrayidx2, align 4
   %cmp3 = icmp eq i32 %1, 2
   %spec.select = select i1 %cmp3, i32 1, i32 %r.012
   br label %for.inc

diff  --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll
index 7d3679846e848..49afe952c32db 100644
--- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC4 --check-prefix=CHECK
 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1IC4 --check-prefix=CHECK
 
-define i32 @select_const_i32_from_icmp(i32* nocapture readonly %v, i64 %n) {
+define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) {
 ; CHECK-LABEL: @select_const_i32_from_icmp
 ; CHECK-VF4IC1:      vector.body:
 ; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 3, i32 3, i32 3, i32 3>, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
@@ -70,8 +70,8 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
   %1 = phi i32 [ 3, %entry ], [ %5, %for.body ]
-  %2 = getelementptr inbounds i32, i32* %v, i64 %0
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %v, i64 %0
+  %3 = load i32, ptr %2, align 4
   %4 = icmp eq i32 %3, 3
   %5 = select i1 %4, i32 %1, i32 7
   %6 = add nuw nsw i64 %0, 1
@@ -83,7 +83,7 @@ exit:                                     ; preds = %for.body
 }
 
 
-define i32 @select_const_i32_from_icmp2(i32* nocapture readonly %v, i64 %n) {
+define i32 @select_const_i32_from_icmp2(ptr nocapture readonly %v, i64 %n) {
 ; CHECK-LABEL: @select_const_i32_from_icmp2
 ; CHECK-VF4IC1:      vector.body:
 ; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 3, i32 3, i32 3, i32 3>, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
@@ -101,8 +101,8 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
   %1 = phi i32 [ 3, %entry ], [ %5, %for.body ]
-  %2 = getelementptr inbounds i32, i32* %v, i64 %0
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %v, i64 %0
+  %3 = load i32, ptr %2, align 4
   %4 = icmp eq i32 %3, 3
   %5 = select i1 %4, i32 7, i32 %1
   %6 = add nuw nsw i64 %0, 1
@@ -114,7 +114,7 @@ exit:                                     ; preds = %for.body
 }
 
 
-define i32 @select_i32_from_icmp(i32* nocapture readonly %v, i32 %a, i32 %b, i64 %n) {
+define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) {
 ; CHECK-LABEL: @select_i32_from_icmp
 ; CHECK-VF4IC1:      vector.ph:
 ; CHECK-VF4IC1:        [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 %a, i32 0
@@ -138,8 +138,8 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
   %1 = phi i32 [ %a, %entry ], [ %5, %for.body ]
-  %2 = getelementptr inbounds i32, i32* %v, i64 %0
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %v, i64 %0
+  %3 = load i32, ptr %2, align 4
   %4 = icmp eq i32 %3, 3
   %5 = select i1 %4, i32 %1, i32 %b
   %6 = add nuw nsw i64 %0, 1
@@ -151,7 +151,7 @@ exit:                                     ; preds = %for.body
 }
 
 
-define i32 @select_const_i32_from_fcmp_fast(float* nocapture readonly %v, i64 %n) {
+define i32 @select_const_i32_from_fcmp_fast(ptr nocapture readonly %v, i64 %n) {
 ; CHECK-LABEL: @select_const_i32_from_fcmp_fast
 ; CHECK-VF4IC1:      vector.body:
 ; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 2, i32 2, i32 2, i32 2>, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
@@ -168,8 +168,8 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
   %1 = phi i32 [ 2, %entry ], [ %5, %for.body ]
-  %2 = getelementptr inbounds float, float* %v, i64 %0
-  %3 = load float, float* %2, align 4
+  %2 = getelementptr inbounds float, ptr %v, i64 %0
+  %3 = load float, ptr %2, align 4
   %4 = fcmp fast ueq float %3, 3.0
   %5 = select i1 %4, i32 %1, i32 1
   %6 = add nuw nsw i64 %0, 1
@@ -181,7 +181,7 @@ exit:                                     ; preds = %for.body
 }
 
 
-define i32 @select_const_i32_from_fcmp(float* nocapture readonly %v, i64 %n) {
+define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) {
 ; CHECK-LABEL: @select_const_i32_from_fcmp
 ; CHECK-VF4IC1:      vector.body:
 ; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 2, i32 2, i32 2, i32 2>, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
@@ -198,8 +198,8 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
   %1 = phi i32 [ 2, %entry ], [ %5, %for.body ]
-  %2 = getelementptr inbounds float, float* %v, i64 %0
-  %3 = load float, float* %2, align 4
+  %2 = getelementptr inbounds float, ptr %v, i64 %0
+  %3 = load float, ptr %2, align 4
   %4 = fcmp ueq float %3, 3.0
   %5 = select i1 %4, i32 %1, i32 1
   %6 = add nuw nsw i64 %0, 1
@@ -248,7 +248,7 @@ exit:                                     ; preds = %for.body
 ; Negative tests
 
 ; We don't support FP reduction variables at the moment.
-define float @select_const_f32_from_icmp(i32* nocapture readonly %v, i64 %n) {
+define float @select_const_f32_from_icmp(ptr nocapture readonly %v, i64 %n) {
 ; CHECK: @select_const_f32_from_icmp
 ; CHECK-NOT: vector.body
 entry:
@@ -257,8 +257,8 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
   %1 = phi fast float [ 3.0, %entry ], [ %5, %for.body ]
-  %2 = getelementptr inbounds i32, i32* %v, i64 %0
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %v, i64 %0
+  %3 = load i32, ptr %2, align 4
   %4 = icmp eq i32 %3, 3
   %5 = select fast i1 %4, float %1, float 7.0
   %6 = add nuw nsw i64 %0, 1
@@ -272,7 +272,7 @@ exit:                                     ; preds = %for.body
 
 ; We don't support select/cmp reduction patterns where there is more than one
 ; use of the icmp/fcmp.
-define i32 @select_const_i32_from_icmp_mul_use(i32* nocapture readonly %v1, i32* %v2, i64 %n) {
+define i32 @select_const_i32_from_icmp_mul_use(ptr nocapture readonly %v1, ptr %v2, i64 %n) {
 ; CHECK-LABEL: @select_const_i32_from_icmp_mul_use
 ; CHECK-NOT: vector.body
 entry:
@@ -282,8 +282,8 @@ for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %8, %for.body ]
   %1 = phi i32 [ 3, %entry ], [ %6, %for.body ]
   %2 = phi i32 [ 0, %entry ], [ %7, %for.body ]
-  %3 = getelementptr inbounds i32, i32* %v1, i64 %0
-  %4 = load i32, i32* %3, align 4
+  %3 = getelementptr inbounds i32, ptr %v1, i64 %0
+  %4 = load i32, ptr %3, align 4
   %5 = icmp eq i32 %4, 3
   %6 = select i1 %5, i32 %1, i32 7
   %7 = zext i1 %5 to i32
@@ -292,13 +292,13 @@ for.body:                                      ; preds = %entry, %for.body
   br i1 %9, label %exit, label %for.body
 
 exit:                                     ; preds = %for.body
-  store i32 %7, i32* %v2, align 4
+  store i32 %7, ptr %v2, align 4
   ret i32 %6
 }
 
 
 ; We don't support selecting loop-variant values.
-define i32 @select_variant_i32_from_icmp(i32* nocapture readonly %v1, i32* nocapture readonly %v2, i64 %n) {
+define i32 @select_variant_i32_from_icmp(ptr nocapture readonly %v1, ptr nocapture readonly %v2, i64 %n) {
 ; CHECK-LABEL: @select_variant_i32_from_icmp
 ; CHECK-NOT: vector.body
 entry:
@@ -307,10 +307,10 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %8, %for.body ]
   %1 = phi i32 [ 3, %entry ], [ %7, %for.body ]
-  %2 = getelementptr inbounds i32, i32* %v1, i64 %0
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds i32, i32* %v2, i64 %0
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds i32, ptr %v1, i64 %0
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds i32, ptr %v2, i64 %0
+  %5 = load i32, ptr %4, align 4
   %6 = icmp eq i32 %3, 3
   %7 = select i1 %6, i32 %1, i32 %5
   %8 = add nuw nsw i64 %0, 1

diff  --git a/llvm/test/Transforms/LoopVectorize/simple-unroll.ll b/llvm/test/Transforms/LoopVectorize/simple-unroll.ll
index 16344183ff44b..3199b69330655 100644
--- a/llvm/test/Transforms/LoopVectorize/simple-unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/simple-unroll.ll
@@ -22,11 +22,11 @@ define void @inc(i32 %n) nounwind uwtable noinline ssp {
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
   %4 = trunc i64 %indvars.iv to i32
   %5 = add nsw i32 %3, %4
-  store i32 %5, i32* %2, align 4
+  store i32 %5, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n

diff  --git a/llvm/test/Transforms/LoopVectorize/skip-iterations.ll b/llvm/test/Transforms/LoopVectorize/skip-iterations.ll
index afea083efc3f5..f3fdb9066fee6 100644
--- a/llvm/test/Transforms/LoopVectorize/skip-iterations.ll
+++ b/llvm/test/Transforms/LoopVectorize/skip-iterations.ll
@@ -22,7 +22,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; }
 ; CHECK-LABEL: test1(
 ; CHECK-NOT: <4 x i32>
-define i32 @test1(i32* nocapture %A, i32 %Length) {
+define i32 @test1(ptr nocapture %A, i32 %Length) {
 entry:
   %cmp8 = icmp sgt i32 %Length, 0
   br i1 %cmp8, label %for.body.preheader, label %end
@@ -32,13 +32,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %if.else
   %indvars.iv = phi i64 [ %indvars.iv.next, %if.else ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4, !tbaa !15
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4, !tbaa !15
   %cmp1 = icmp sgt i32 %0, 10
   br i1 %cmp1, label %end.loopexit, label %if.else
 
 if.else:                                          ; preds = %for.body
-  store i32 0, i32* %arrayidx, align 4, !tbaa !15
+  store i32 0, ptr %arrayidx, align 4, !tbaa !15
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %1 = trunc i64 %indvars.iv.next to i32
   %cmp = icmp slt i32 %1, %Length
@@ -65,7 +65,7 @@ end:                                              ; preds = %end.loopexit, %entr
 
 ; CHECK-LABEL: test2(
 ; CHECK-NOT: <4 x i32>
-define i32 @test2(i32* nocapture %A, i32 %Length, i32 %K) {
+define i32 @test2(ptr nocapture %A, i32 %Length, i32 %K) {
 entry:
   %cmp8 = icmp sgt i32 %Length, 0
   br i1 %cmp8, label %for.body.preheader, label %end
@@ -75,8 +75,8 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %if.else
   %indvars.iv = phi i64 [ %indvars.iv.next, %if.else ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %ld = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %ld = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp eq i32 %ld, %K
   br i1 %cmp1, label %end.loopexit, label %if.else
 
@@ -107,7 +107,7 @@ end:                                              ; preds = %end.loopexit, %entr
 ; we handle the speculation safety of the widened load).
 ; CHECK-LABEL: test3(
 ; CHECK-NOT: <4 x i32>
-define i32 @test3(i32* nocapture %A, i32 %Length, i32 %K) {
+define i32 @test3(ptr nocapture %A, i32 %Length, i32 %K) {
 entry:
   %cmp8 = icmp sgt i32 %Length, 0
   br i1 %cmp8, label %for.body.preheader, label %end
@@ -117,8 +117,8 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %if.else
   %indvars.iv = phi i64 [ %indvars.iv.next, %if.else ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %ld = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %ld = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp eq i32 %ld, %K
   br i1 %cmp1, label %end.loopexit, label %if.else
 
@@ -147,7 +147,7 @@ end:                                              ; preds = %end.loopexit, %entr
 ; For this test, we vectorize and generate predicated stores to A[i].
 ; CHECK-LABEL: test4(
 ; CHECK: <4 x i32>
-define void @test4(i32* nocapture %A, i32 %Length, i32 %K, i32 %J) {
+define void @test4(ptr nocapture %A, i32 %Length, i32 %K, i32 %J) {
 entry:
   %cmp8 = icmp sgt i32 %Length, 0
   br i1 %cmp8, label %for.body.preheader, label %end.loopexit
@@ -157,15 +157,15 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %if.else
   %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %trunc = trunc i64 %indvars.iv.next to i32
-  %ld = load i32, i32* %arrayidx, align 4
+  %ld = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp eq i32 %ld, %K
   br i1 %cmp1, label %latch, label %if.else
 
 if.else:
-  store i32 %J, i32* %arrayidx, align 4
+  store i32 %J, ptr %arrayidx, align 4
   br label %latch
 
 latch:                                          ; preds = %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/small-loop.ll b/llvm/test/Transforms/LoopVectorize/small-loop.ll
index 8f9685ad1ed94..5705ee3a4c0cf 100644
--- a/llvm/test/Transforms/LoopVectorize/small-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/small-loop.ll
@@ -14,13 +14,13 @@ define void @example1() nounwind uwtable ssp {
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %6, i32* %7, align 4
+  %7 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %6, ptr %7, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 8  ;   <-----  A really small trip count
@@ -38,13 +38,13 @@ define void @bound1(i32 %k) nounwind uwtable ssp {
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %6, i32* %7, align 4
+  %7 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %6, ptr %7, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %large = icmp sge i32 %lftr.wideiv, 8

diff  --git a/llvm/test/Transforms/LoopVectorize/start-non-zero.ll b/llvm/test/Transforms/LoopVectorize/start-non-zero.ll
index 93cc8deccbf36..35d4ff09b60eb 100644
--- a/llvm/test/Transforms/LoopVectorize/start-non-zero.ll
+++ b/llvm/test/Transforms/LoopVectorize/start-non-zero.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK-LABEL: @start_at_nonzero(
 ;CHECK: mul nuw <4 x i32>
 ;CHECK: ret i32
-define i32 @start_at_nonzero(i32* nocapture %a, i32 %start, i32 %end) nounwind uwtable ssp {
+define i32 @start_at_nonzero(ptr nocapture %a, i32 %start, i32 %end) nounwind uwtable ssp {
 entry:
   %cmp3 = icmp slt i32 %start, %end
   br i1 %cmp3, label %for.body.lr.ph, label %for.end
@@ -16,10 +16,10 @@ for.body.lr.ph:                                   ; preds = %entry
 
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx, align 4
   %mul = mul nuw i32 %1, 333
-  store i32 %mul, i32* %arrayidx, align 4
+  store i32 %mul, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %2 = trunc i64 %indvars.iv.next to i32
   %cmp = icmp slt i32 %2, %end

diff  --git a/llvm/test/Transforms/LoopVectorize/store-shuffle-bug.ll b/llvm/test/Transforms/LoopVectorize/store-shuffle-bug.ll
index 68ad00e67a0fa..bb9e8a5af8cf1 100644
--- a/llvm/test/Transforms/LoopVectorize/store-shuffle-bug.ll
+++ b/llvm/test/Transforms/LoopVectorize/store-shuffle-bug.ll
@@ -29,16 +29,16 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 93, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = add i64 %indvars.iv, 1
-  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* @uf, i64 0, i64 %0
-  %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* @xi, i64 0, i64 %0
-  %1 = load i32, i32* %arrayidx3, align 4
-  %2 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [100 x i32], ptr @uf, i64 0, i64 %0
+  %arrayidx3 = getelementptr inbounds [100 x i32], ptr @xi, i64 0, i64 %0
+  %1 = load i32, ptr %arrayidx3, align 4
+  %2 = load i32, ptr %arrayidx, align 4
   %add4 = add nsw i32 %2, %1
-  store i32 %add4, i32* %arrayidx, align 4
-  %arrayidx7 = getelementptr inbounds [100 x i32], [100 x i32]* @q, i64 0, i64 %0
-  %3 = load i32, i32* %arrayidx7, align 4
+  store i32 %add4, ptr %arrayidx, align 4
+  %arrayidx7 = getelementptr inbounds [100 x i32], ptr @q, i64 0, i64 %0
+  %3 = load i32, ptr %arrayidx7, align 4
   %add8 = add nsw i32 %add4, %3
-  store i32 %add8, i32* %arrayidx, align 4
+  store i32 %add8, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, -1
   %4 = trunc i64 %indvars.iv.next to i32
   %cmp = icmp ugt i32 %4, 2

diff  --git a/llvm/test/Transforms/LoopVectorize/struct_access.ll b/llvm/test/Transforms/LoopVectorize/struct_access.ll
index 0e2401be1df65..6365234935431 100644
--- a/llvm/test/Transforms/LoopVectorize/struct_access.ll
+++ b/llvm/test/Transforms/LoopVectorize/struct_access.ll
@@ -23,7 +23,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK-LABEL: @foo(
 ;CHECK-NOT: load <4 x i32>
 ;CHECK: ret
-define i32 @foo(%struct.coordinate* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+define i32 @foo(ptr nocapture %A, i32 %n) nounwind uwtable readonly ssp {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
@@ -31,8 +31,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
-  %x = getelementptr inbounds %struct.coordinate, %struct.coordinate* %A, i64 %indvars.iv, i32 0
-  %0 = load i32, i32* %x, align 4
+  %x = getelementptr inbounds %struct.coordinate, ptr %A, i64 %indvars.iv, i32 0
+  %0 = load i32, ptr %x, align 4
   %add = add nsw i32 %0, %sum.05
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -65,7 +65,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK-LABEL: @bar(
 ;CHECK: load <4 x i32>
 ;CHECK: ret
-define i32 @bar(%struct.lit* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+define i32 @bar(ptr nocapture %A, i32 %n) nounwind uwtable readonly ssp {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
@@ -73,8 +73,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
-  %x = getelementptr inbounds %struct.lit, %struct.lit* %A, i64 %indvars.iv, i32 0
-  %0 = load i32, i32* %x, align 4
+  %x = getelementptr inbounds %struct.lit, ptr %A, i64 %indvars.iv, i32 0
+  %0 = load i32, ptr %x, align 4
   %add = add nsw i32 %0, %sum.05
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32

diff  --git a/llvm/test/Transforms/LoopVectorize/tbaa-nodep.ll b/llvm/test/Transforms/LoopVectorize/tbaa-nodep.ll
index f88cedff288d4..0f03d39f76c01 100644
--- a/llvm/test/Transforms/LoopVectorize/tbaa-nodep.ll
+++ b/llvm/test/Transforms/LoopVectorize/tbaa-nodep.ll
@@ -4,14 +4,14 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; TBAA partitions the accesses in this loop, so it can be vectorized without
 ; runtime checks.
-define i32 @test1(i32* nocapture %a, float* nocapture readonly %b) {
+define i32 @test1(ptr nocapture %a, ptr nocapture readonly %b) {
 ; CHECK-LABEL: @test1
 ; CHECK: entry:
 ; CHECK-NEXT: br label %vector.body
 ; CHECK: vector.body:
 
-; CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
-; CHECK: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa
+; CHECK: load <4 x float>, ptr %{{.*}}, align 4, !tbaa
+; CHECK: store <4 x i32> %{{.*}}, ptr %{{.*}}, align 4, !tbaa
 
 ; CHECK: ret i32 0
 
@@ -21,8 +21,8 @@ define i32 @test1(i32* nocapture %a, float* nocapture readonly %b) {
 ; CHECK-NOTBAA-NOT: icmp
 ; CHECK-NOTBAA: br i1 {{.+}}, label %for.body, label %vector.body
 
-; CHECK-NOTBAA: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
-; CHECK-NOTBAA: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa
+; CHECK-NOTBAA: load <4 x float>, ptr %{{.*}}, align 4, !tbaa
+; CHECK-NOTBAA: store <4 x i32> %{{.*}}, ptr %{{.*}}, align 4, !tbaa
 
 ; CHECK-NOTBAA: ret i32 0
 
@@ -31,11 +31,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4, !tbaa !0
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4, !tbaa !0
   %conv = fptosi float %0 to i32
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 %conv, i32* %arrayidx2, align 4, !tbaa !4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %conv, ptr %arrayidx2, align 4, !tbaa !4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1600
   br i1 %exitcond, label %for.end, label %for.body
@@ -46,15 +46,15 @@ for.end:                                          ; preds = %for.body
 
 ; This test is like the first, except here there is still one runtime check
 ; required. Without TBAA, however, two checks are required.
-define i32 @test2(i32* nocapture readonly %a, float* nocapture readonly %b, float* nocapture %c) {
+define i32 @test2(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c) {
 ; CHECK-LABEL: @test2
 ; CHECK: entry:
 ; CHECK: icmp ult i64
 ; CHECK-NOT: icmp
 ; CHECK: br i1 {{.+}}, label %for.body, label %vector.body
 
-; CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
-; CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa
+; CHECK: load <4 x float>, ptr %{{.*}}, align 4, !tbaa
+; CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 4, !tbaa
 
 ; CHECK: ret i32 0
 
@@ -65,8 +65,8 @@ define i32 @test2(i32* nocapture readonly %a, float* nocapture readonly %b, floa
 ; CHECK-NOTBAA-NOT: icmp
 ; CHECK-NOTBAA: br i1 {{.+}}, label %for.body, label %vector.body
 
-; CHECK-NOTBAA: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
-; CHECK-NOTBAA: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa
+; CHECK-NOTBAA: load <4 x float>, ptr %{{.*}}, align 4, !tbaa
+; CHECK-NOTBAA: store <4 x float> %{{.*}}, ptr %{{.*}}, align 4, !tbaa
 
 ; CHECK-NOTBAA: ret i32 0
 
@@ -75,14 +75,14 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4, !tbaa !0
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4, !tbaa !4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4, !tbaa !4
   %conv = sitofp i32 %1 to float
   %mul = fmul float %0, %conv
-  %arrayidx4 = getelementptr inbounds float, float* %c, i64 %indvars.iv
-  store float %mul, float* %arrayidx4, align 4, !tbaa !0
+  %arrayidx4 = getelementptr inbounds float, ptr %c, i64 %indvars.iv
+  store float %mul, ptr %arrayidx4, align 4, !tbaa !0
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1600
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/tripcount.ll b/llvm/test/Transforms/LoopVectorize/tripcount.ll
index 6c57bec82fd4b..ab88c705dd998 100644
--- a/llvm/test/Transforms/LoopVectorize/tripcount.ll
+++ b/llvm/test/Transforms/LoopVectorize/tripcount.ll
@@ -18,11 +18,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %i.08, %bound
   br i1 %exitcond, label %for.end, label %for.body, !prof !1
@@ -43,11 +43,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %i.08, %bound
   br i1 %exitcond, label %for.end, label %for.body, !prof !1
@@ -73,11 +73,11 @@ for.preheader:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %i.08, %bound
   br i1 %exitcond, label %for.end, label %for.body, !prof !3
@@ -98,11 +98,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp sgt i32 %i.08, %bound
   br i1 %exitcond, label %for.end, label %for.body, !prof !1
@@ -122,11 +122,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp slt i32 %i.08, 2
   br i1 %exitcond, label %for.body, label %for.end
@@ -146,11 +146,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp slt i32 %i.08, 1000
   br i1 %exitcond, label %for.body, label %for.end
@@ -170,11 +170,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 5
   %exitcond = icmp slt i32 %i.08, 10
   br i1 %exitcond, label %for.body, label %for.end
@@ -194,11 +194,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp slt i32 %i.08, 1000
   br i1 %exitcond, label %for.body, label %for.end, !prof !1

diff  --git a/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll b/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll
index 5624946861cb7..2278821c5b2f9 100644
--- a/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -force-vector-width=8 -S < %s | FileCheck %s
 
-define i8 @reduction_and_trunc(i8* noalias nocapture %ptr) {
+define i8 @reduction_and_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-LABEL: @reduction_and_trunc(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -11,9 +11,8 @@ define i8 @reduction_and_trunc(i8* noalias nocapture %ptr) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i8> [ <i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[TMP2]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
 ; CHECK-NEXT:    [[TMP3]] = and <8 x i8> [[VEC_PHI]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
@@ -36,8 +35,8 @@ for.body:
   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
   %sum.02p = phi i32 [ %and, %for.body ], [ 0, %entry ]
   %sum.02 = and i32 %sum.02p, 255
-  %gep = getelementptr inbounds i8, i8* %ptr, i32 %iv
-  %load = load i8, i8* %gep
+  %gep = getelementptr inbounds i8, ptr %ptr, i32 %iv
+  %load = load i8, ptr %gep
   %ext = zext i8 %load to i32
   %and = and i32 %sum.02, %ext
   %iv.next = add i32 %iv, 1
@@ -49,7 +48,7 @@ for.end:
   ret i8 %ret
 }
 
-define i16 @reduction_or_trunc(i16* noalias nocapture %ptr) {
+define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-LABEL: @reduction_or_trunc(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -59,9 +58,8 @@ define i16 @reduction_or_trunc(i16* noalias nocapture %ptr) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP2]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2
 ; CHECK-NEXT:    [[TMP3]] = or <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
@@ -84,8 +82,8 @@ for.body:
   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
   %sum.02p = phi i32 [ %xor, %for.body ], [ 0, %entry ]
   %sum.02 = and i32 %sum.02p, 65535
-  %gep = getelementptr inbounds i16, i16* %ptr, i32 %iv
-  %load = load i16, i16* %gep
+  %gep = getelementptr inbounds i16, ptr %ptr, i32 %iv
+  %load = load i16, ptr %gep
   %ext = zext i16 %load to i32
   %xor = or i32 %sum.02, %ext
   %iv.next = add i32 %iv, 1
@@ -97,7 +95,7 @@ for.end:
   ret i16 %ret
 }
 
-define i16 @reduction_xor_trunc(i16* noalias nocapture %ptr) {
+define i16 @reduction_xor_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-LABEL: @reduction_xor_trunc(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -107,9 +105,8 @@ define i16 @reduction_xor_trunc(i16* noalias nocapture %ptr) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP2]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2
 ; CHECK-NEXT:    [[TMP3]] = xor <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
@@ -132,8 +129,8 @@ for.body:
   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
   %sum.02p = phi i32 [ %xor, %for.body ], [ 0, %entry ]
   %sum.02 = and i32 %sum.02p, 65535
-  %gep = getelementptr inbounds i16, i16* %ptr, i32 %iv
-  %load = load i16, i16* %gep
+  %gep = getelementptr inbounds i16, ptr %ptr, i32 %iv
+  %load = load i16, ptr %gep
   %ext = zext i16 %load to i32
   %xor = xor i32 %sum.02, %ext
   %iv.next = add i32 %iv, 1
@@ -145,7 +142,7 @@ for.end:
   ret i16 %ret
 }
 
-define i8 @reduction_smin_trunc(i8* noalias nocapture %ptr) {
+define i8 @reduction_smin_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-LABEL: @reduction_smin_trunc(
 ; CHECK-NOT: vector.body
 ; CHECK-NOT: <8 x
@@ -157,8 +154,8 @@ for.body:
   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
   %sum.02p = phi i32 [ %min, %for.body ], [ 256, %entry ]
   %sum.02 = and i32 %sum.02p, 255
-  %gep = getelementptr inbounds i8, i8* %ptr, i32 %iv
-  %load = load i8, i8* %gep
+  %gep = getelementptr inbounds i8, ptr %ptr, i32 %iv
+  %load = load i8, ptr %gep
   %ext = sext i8 %load to i32
   %icmp = icmp slt i32 %sum.02, %ext
   %min = select i1 %icmp, i32 %sum.02, i32 %ext
@@ -171,7 +168,7 @@ for.end:
   ret i8 %ret
 }
 
-define i8 @reduction_umin_trunc(i8* noalias nocapture %ptr) {
+define i8 @reduction_umin_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-LABEL: @reduction_umin_trunc(
 ; CHECK-NOT: vector.body
 ; CHECK-NOT: <8 x
@@ -183,8 +180,8 @@ for.body:
   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
   %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ]
   %sum.02 = and i32 %sum.02p, 255
-  %gep = getelementptr inbounds i8, i8* %ptr, i32 %iv
-  %load = load i8, i8* %gep
+  %gep = getelementptr inbounds i8, ptr %ptr, i32 %iv
+  %load = load i8, ptr %gep
   %ext = zext i8 %load to i32
   %icmp = icmp ult i32 %sum.02, %ext
   %min = select i1 %icmp, i32 %sum.02, i32 %ext
@@ -197,7 +194,7 @@ for.end:
   ret i8 %ret
 }
 
-define i16 @reduction_smax_trunc(i16* noalias nocapture %ptr) {
+define i16 @reduction_smax_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-LABEL: @reduction_smax_trunc(
 ; CHECK-NOT: vector.body
 ; CHECK-NOT: <8 x
@@ -209,8 +206,8 @@ for.body:
   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
   %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ]
   %sum.02 = and i32 %sum.02p, 65535
-  %gep = getelementptr inbounds i16, i16* %ptr, i32 %iv
-  %load = load i16, i16* %gep
+  %gep = getelementptr inbounds i16, ptr %ptr, i32 %iv
+  %load = load i16, ptr %gep
   %ext = sext i16 %load to i32
   %icmp = icmp sgt i32 %sum.02, %ext
   %min = select i1 %icmp, i32 %sum.02, i32 %ext
@@ -223,7 +220,7 @@ for.end:
   ret i16 %ret
 }
 
-define i16 @reduction_umax_trunc(i16* noalias nocapture %ptr) {
+define i16 @reduction_umax_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-LABEL: @reduction_umax_trunc(
 ; CHECK-NOT: vector.body
 ; CHECK-NOT: <8 x
@@ -235,8 +232,8 @@ for.body:
   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
   %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ]
   %sum.02 = and i32 %sum.02p, 65535
-  %gep = getelementptr inbounds i16, i16* %ptr, i32 %iv
-  %load = load i16, i16* %gep
+  %gep = getelementptr inbounds i16, ptr %ptr, i32 %iv
+  %load = load i16, ptr %gep
   %ext = zext i16 %load to i32
   %icmp = icmp ugt i32 %sum.02, %ext
   %min = select i1 %icmp, i32 %sum.02, i32 %ext

diff  --git a/llvm/test/Transforms/LoopVectorize/undef-inst-bug.ll b/llvm/test/Transforms/LoopVectorize/undef-inst-bug.ll
index e8a522996bcfa..1fccf546f4a67 100644
--- a/llvm/test/Transforms/LoopVectorize/undef-inst-bug.ll
+++ b/llvm/test/Transforms/LoopVectorize/undef-inst-bug.ll
@@ -24,8 +24,8 @@ for.body:
   ; Loop invariant anchored in loop.
   %idxprom21 = zext i32 undef to i64
 
-  %arrayidx23 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* undef, i64 0, i64 %idxprom21, i64 %indvars.iv17
-  store i32 undef, i32* %arrayidx23, align 4
+  %arrayidx23 = getelementptr inbounds [100 x [100 x i32]], ptr undef, i64 0, i64 %idxprom21, i64 %indvars.iv17
+  store i32 undef, ptr %arrayidx23, align 4
   %indvars.next= add i64 %indvars.iv17, -1
   %0 = trunc i64 %indvars.next to i32
   %cmp15 = icmp ugt i32 %0, undef

diff  --git a/llvm/test/Transforms/LoopVectorize/unroll-novec-memcheck-metadata.ll b/llvm/test/Transforms/LoopVectorize/unroll-novec-memcheck-metadata.ll
index fd5255c46dae7..faadce6b29059 100644
--- a/llvm/test/Transforms/LoopVectorize/unroll-novec-memcheck-metadata.ll
+++ b/llvm/test/Transforms/LoopVectorize/unroll-novec-memcheck-metadata.ll
@@ -14,18 +14,18 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; CHECK-DAG: ![[MD3]] = distinct !{![[MD3]], !"LVerDomain"}
 
 ; Function Attrs: norecurse nounwind uwtable
-define void @test(i32* nocapture readonly %a, i32* nocapture %b) local_unnamed_addr #0 {
+define void @test(ptr nocapture readonly %a, ptr nocapture %b) local_unnamed_addr #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %l.1 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %l.2 = load i32, i32* %arrayidx2
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %l.1 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %l.2 = load i32, ptr %arrayidx2
   %add = add nsw i32 %l.1, %l.2
-  store i32 %add, i32* %arrayidx2, align 4
+  store i32 %add, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 10000
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/unroll.ll b/llvm/test/Transforms/LoopVectorize/unroll.ll
index a636e946a0fad..94e7549149e61 100644
--- a/llvm/test/Transforms/LoopVectorize/unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/unroll.ll
@@ -12,7 +12,7 @@
 
 define void @foo() #0 {
 entry:
-  %0 = load i32, i32* @N, align 4
+  %0 = load i32, ptr @N, align 4
   %cmp5 = icmp sgt i32 %0, 0
   br i1 %cmp5, label %for.body.lr.ph, label %for.end
 
@@ -23,8 +23,8 @@ for.body.lr.ph:                                   ; preds = %entry
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.06 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %mul = mul nuw nsw i64 %i.06, 7
-  %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* @a, i64 0, i64 %mul
-  store i32 3, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1000 x i32], ptr @a, i64 0, i64 %mul
+  store i32 3, ptr %arrayidx, align 4
   %inc = add nuw nsw i64 %i.06, 1
   %cmp = icmp slt i64 %inc, %conv
   br i1 %cmp, label %for.body, label %for.end.loopexit

diff  --git a/llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll b/llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll
index d6a24e614ea07..ba02ddd1e5e23 100644
--- a/llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll
+++ b/llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll
@@ -8,7 +8,7 @@
 ; loop, we must exit to the epilogue on iteration with %indvars.iv = 1022 to
 ; avoid an out of bounds access.
 
-define void @test(double* %data) {
+define void @test(ptr %data) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -22,14 +22,14 @@ define void @test(double* %data) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[INDUCTION1]], 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[TMP0]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[TMP1]], 1
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds double, double* [[DATA:%.*]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, double* [[DATA]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load double, double* [[TMP4]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = load double, double* [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds double, ptr [[DATA:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load double, ptr [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = load double, ptr [[TMP5]], align 8
 ; CHECK-NEXT:    [[TMP8:%.*]] = fneg double [[TMP6]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = fneg double [[TMP7]]
-; CHECK-NEXT:    store double [[TMP8]], double* [[TMP4]], align 8
-; CHECK-NEXT:    store double [[TMP9]], double* [[TMP5]], align 8
+; CHECK-NEXT:    store double [[TMP8]], ptr [[TMP4]], align 8
+; CHECK-NEXT:    store double [[TMP9]], ptr [[TMP5]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1022
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -46,10 +46,10 @@ define void @test(double* %data) {
 ; CHECK:       for.latch:
 ; CHECK-NEXT:    [[T15:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[T16:%.*]] = or i64 [[T15]], 1
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[DATA]], i64 [[T16]]
-; CHECK-NEXT:    [[T17:%.*]] = load double, double* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[T16]]
+; CHECK-NEXT:    [[T17:%.*]] = load double, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[FNEG:%.*]] = fneg double [[T17]]
-; CHECK-NEXT:    store double [[FNEG]], double* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    store double [[FNEG]], ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    br label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
@@ -66,10 +66,10 @@ for.body:
 for.latch:
   %t15 = shl nuw nsw i64 %indvars.iv, 1
   %t16 = or i64 %t15, 1
-  %arrayidx = getelementptr inbounds double, double* %data, i64 %t16
-  %t17 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %data, i64 %t16
+  %t17 = load double, ptr %arrayidx, align 8
   %fneg = fneg double %t17
-  store double %fneg, double* %arrayidx, align 8
+  store double %fneg, ptr %arrayidx, align 8
   br label %for.body
 
 for.end:

diff  --git a/llvm/test/Transforms/LoopVectorize/unroll_novec.ll b/llvm/test/Transforms/LoopVectorize/unroll_novec.ll
index a95bee34309b2..4858e3698ba55 100644
--- a/llvm/test/Transforms/LoopVectorize/unroll_novec.ll
+++ b/llvm/test/Transforms/LoopVectorize/unroll_novec.ll
@@ -9,11 +9,11 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;    a[i] += i;
 ;  }
 ;CHECK-LABEL: @inc(
-;CHECK: load i32, i32*
-;CHECK: load i32, i32*
-;CHECK: load i32, i32*
-;CHECK: load i32, i32*
-;CHECK-NOT: load i32, i32*
+;CHECK: load i32, ptr
+;CHECK: load i32, ptr
+;CHECK: load i32, ptr
+;CHECK: load i32, ptr
+;CHECK-NOT: load i32, ptr
 ;CHECK: add nsw i32
 ;CHECK: add nsw i32
 ;CHECK: add nsw i32
@@ -32,11 +32,11 @@ define void @inc(i32 %n) nounwind uwtable noinline ssp {
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
   %4 = trunc i64 %indvars.iv to i32
   %5 = add nsw i32 %3, %4
-  store i32 %5, i32* %2, align 4
+  store i32 %5, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n

diff  --git a/llvm/test/Transforms/LoopVectorize/unsafe-dep-remark.ll b/llvm/test/Transforms/LoopVectorize/unsafe-dep-remark.ll
index 096632272a31c..9394270f3a754 100644
--- a/llvm/test/Transforms/LoopVectorize/unsafe-dep-remark.ll
+++ b/llvm/test/Transforms/LoopVectorize/unsafe-dep-remark.ll
@@ -7,34 +7,34 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ;     1	void success (char *A, char *B, char *C, char *D, char *E, int N) {
 ;     2	  for(int i = 0; i < N; i++) {
 ;     3	    A[i + 1] = A[i] + B[i];
-;     4	    C[i] = D[i] * E[i];
+;     4	    C[i] = Dptr E[i];
 ;     5	  }
 ;     6	}
 
 ; CHECK: remark: /tmp/kk.c:3:14: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
 
-define void @success(i8* nocapture %A, i8* nocapture readonly %B, i8* nocapture %C, i8* nocapture readonly %D, i8* nocapture readonly %E, i32 %N) !dbg !6 {
+define void @success(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture %C, ptr nocapture readonly %D, ptr nocapture readonly %E, i32 %N) !dbg !6 {
 entry:
   %cmp28 = icmp sgt i32 %N, 0, !dbg !8
   br i1 %cmp28, label %for.body, label %for.cond.cleanup, !dbg !9
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !11
-  %0 = load i8, i8* %arrayidx, align 1, !dbg !11, !tbaa !12
-  %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !15
-  %1 = load i8, i8* %arrayidx2, align 1, !dbg !15, !tbaa !12
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %indvars.iv, !dbg !11
+  %0 = load i8, ptr %arrayidx, align 1, !dbg !11, !tbaa !12
+  %arrayidx2 = getelementptr inbounds i8, ptr %B, i64 %indvars.iv, !dbg !15
+  %1 = load i8, ptr %arrayidx2, align 1, !dbg !15, !tbaa !12
   %add = add i8 %1, %0, !dbg !16
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !9
-  %arrayidx7 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv.next, !dbg !17
-  store i8 %add, i8* %arrayidx7, align 1, !dbg !18, !tbaa !12
-  %arrayidx9 = getelementptr inbounds i8, i8* %D, i64 %indvars.iv, !dbg !19
-  %2 = load i8, i8* %arrayidx9, align 1, !dbg !19, !tbaa !12
-  %arrayidx12 = getelementptr inbounds i8, i8* %E, i64 %indvars.iv, !dbg !20
-  %3 = load i8, i8* %arrayidx12, align 1, !dbg !20, !tbaa !12
+  %arrayidx7 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv.next, !dbg !17
+  store i8 %add, ptr %arrayidx7, align 1, !dbg !18, !tbaa !12
+  %arrayidx9 = getelementptr inbounds i8, ptr %D, i64 %indvars.iv, !dbg !19
+  %2 = load i8, ptr %arrayidx9, align 1, !dbg !19, !tbaa !12
+  %arrayidx12 = getelementptr inbounds i8, ptr %E, i64 %indvars.iv, !dbg !20
+  %3 = load i8, ptr %arrayidx12, align 1, !dbg !20, !tbaa !12
   %mul = mul i8 %3, %2, !dbg !21
-  %arrayidx16 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !22
-  store i8 %mul, i8* %arrayidx16, align 1, !dbg !23, !tbaa !12
+  %arrayidx16 = getelementptr inbounds i8, ptr %C, i64 %indvars.iv, !dbg !22
+  store i8 %mul, ptr %arrayidx16, align 1, !dbg !23, !tbaa !12
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !9
   %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !9
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !9

diff  --git a/llvm/test/Transforms/LoopVectorize/unsafe-vf-hint-remark.ll b/llvm/test/Transforms/LoopVectorize/unsafe-vf-hint-remark.ll
index c7488b67c34fa..0c9fa742765d3 100644
--- a/llvm/test/Transforms/LoopVectorize/unsafe-vf-hint-remark.ll
+++ b/llvm/test/Transforms/LoopVectorize/unsafe-vf-hint-remark.ll
@@ -16,7 +16,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK: remark: <unknown>:0:0: User-specified vectorization factor 4 is unsafe, clamping to maximum safe vectorization factor 2
 ; CHECK-LABEL: @foo
 ; CHECK: <2 x i32>
-define void @foo(i32* %a, i32* %b) {
+define void @foo(ptr %a, ptr %b) {
 entry:
   br label %loop.ph
 
@@ -25,14 +25,14 @@ loop.ph:
 
 loop:
   %iv = phi i64 [ 0, %loop.ph ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 2
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0

diff  --git a/llvm/test/Transforms/LoopVectorize/value-ptr-bug.ll b/llvm/test/Transforms/LoopVectorize/value-ptr-bug.ll
index 7fac2b9b984f9..6579e0e33b308 100644
--- a/llvm/test/Transforms/LoopVectorize/value-ptr-bug.ll
+++ b/llvm/test/Transforms/LoopVectorize/value-ptr-bug.ll
@@ -12,35 +12,33 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK-LABEL: @test_vh(
 ; CHECK-NOT: store <4 x i8> undef
 
-define void @test_vh(i32* %ptr265, i32* %ptr266, i32 %sub267) {
+define void @test_vh(ptr %ptr265, ptr %ptr266, i32 %sub267) {
 entry:
   br label %loop
 
 loop:
   %inc = phi i32 [ %sub267, %entry ], [ %add, %loop]
   %ext.inc = sext i32 %inc to i64
-  %add.ptr265 = getelementptr inbounds i32, i32* %ptr265, i64 %ext.inc
-  %add.ptr266 = getelementptr inbounds i32, i32* %ptr266, i64 %ext.inc
+  %add.ptr265 = getelementptr inbounds i32, ptr %ptr265, i64 %ext.inc
+  %add.ptr266 = getelementptr inbounds i32, ptr %ptr266, i64 %ext.inc
   %add = add i32 %inc, 9
   %cmp = icmp slt i32 %add, 140
   br i1 %cmp, label %block1, label %loop
 
 block1:
   %sub267.lcssa = phi i32 [ %add, %loop ]
-  %add.ptr266.lcssa = phi i32* [ %add.ptr266, %loop ]
-  %add.ptr265.lcssa = phi i32* [ %add.ptr265, %loop ]
-  %tmp29 = bitcast i32* %add.ptr265.lcssa to i8*
-  %tmp30 = bitcast i32* %add.ptr266.lcssa to i8*
+  %add.ptr266.lcssa = phi ptr [ %add.ptr266, %loop ]
+  %add.ptr265.lcssa = phi ptr [ %add.ptr265, %loop ]
   br label %do.body272
 
 do.body272:
   %row_width.5 = phi i32 [ %sub267.lcssa, %block1 ], [ %dec, %do.body272 ]
-  %sp.4 = phi i8* [ %tmp30, %block1 ], [ %incdec.ptr273, %do.body272 ]
-  %dp.addr.4 = phi i8* [ %tmp29, %block1 ], [ %incdec.ptr274, %do.body272 ]
-  %incdec.ptr273 = getelementptr inbounds i8, i8* %sp.4, i64 1
-  %tmp31 = load i8, i8* %sp.4, align 1
-  %incdec.ptr274 = getelementptr inbounds i8, i8* %dp.addr.4, i64 1
-  store i8 %tmp31, i8* %dp.addr.4, align 1
+  %sp.4 = phi ptr [ %add.ptr266.lcssa, %block1 ], [ %incdec.ptr273, %do.body272 ]
+  %dp.addr.4 = phi ptr [ %add.ptr265.lcssa, %block1 ], [ %incdec.ptr274, %do.body272 ]
+  %incdec.ptr273 = getelementptr inbounds i8, ptr %sp.4, i64 1
+  %tmp31 = load i8, ptr %sp.4, align 1
+  %incdec.ptr274 = getelementptr inbounds i8, ptr %dp.addr.4, i64 1
+  store i8 %tmp31, ptr %dp.addr.4, align 1
   %dec = add i32 %row_width.5, -1
   %cmp276 = icmp eq i32 %dec, 0
   br i1 %cmp276, label %loop.exit, label %do.body272

diff  --git a/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll
index 5a1c3eed9d959..697df0746fc9e 100644
--- a/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll
@@ -67,8 +67,8 @@ for.body:
   %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
   %sext = shl i32 %p.09, 24
   %conv = ashr exact i32 %sext, 24
-  %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [250 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %conv, ptr %arrayidx, align 4
   %add = add nsw i32 %conv, %step
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
@@ -115,7 +115,7 @@ for.end:
 ; VF8-NEXT:  [[OFFSET_IDX:%.+]] = mul i64 [[INDEX]], %step
 ; VF8-NEXT:  [[MUL0:%.+]] = mul i64 0, %step
 ; VF8-NEXT:  [[ADD:%.+]] = add i64 [[OFFSET_IDX]], [[MUL0]]
-; VF8:       getelementptr inbounds i32, i32* %in, i64 [[ADD]]
+; VF8:       getelementptr inbounds i32, ptr %in, i64 [[ADD]]
 ; VF8: middle.block:
 
 ; VF1-LABEL: @doit2
@@ -124,7 +124,7 @@ for.end:
 ; VF1: middle.block:
 ;
 
-define void @doit2(i32* nocapture readonly %in, i32* nocapture %out, i64 %size, i64 %step)  {
+define void @doit2(ptr nocapture readonly %in, ptr nocapture %out, i64 %size, i64 %step)  {
 entry:
   %cmp9 = icmp eq i64 %size, 0
   br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph
@@ -143,10 +143,10 @@ for.body:
   %offset.010 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %sext = shl i64 %w_ix.011, 32
   %idxprom = ashr exact i64 %sext, 32
-  %arrayidx = getelementptr inbounds i32, i32* %in, i64 %idxprom
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %out, i64 %offset.010
-  store i32 %0, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %in, i64 %idxprom
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %out, i64 %offset.010
+  store i32 %0, ptr %arrayidx1, align 4
   %add = add i64 %idxprom, %step
   %inc = add nuw i64 %offset.010, 1
   %exitcond = icmp eq i64 %inc, %size
@@ -200,8 +200,8 @@ for.body:
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
   %conv = and i32 %p.09, 255
-  %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [250 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %conv, ptr %arrayidx, align 4
   %add = add nsw i32 %conv, %step
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
@@ -221,7 +221,7 @@ for.end:
 ; VF8: store <8 x i32> %vec.ind
 ; VF8: middle.block:
 ;
-define void @test_conv_in_latch_block(i32 %n, i32 %step, i32* noalias %A, i32* noalias %B) {
+define void @test_conv_in_latch_block(i32 %n, i32 %step, ptr noalias %A, ptr noalias %B) {
 entry:
   %wide.trip.count = zext i32 %n to i64
   br label %loop
@@ -229,21 +229,21 @@ entry:
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
   %p.09 = phi i32 [ 0, %entry ], [ %add, %latch ]
-  %B.gep = getelementptr inbounds i32, i32* %B, i64 %iv
-  %l = load i32, i32* %B.gep
+  %B.gep = getelementptr inbounds i32, ptr %B, i64 %iv
+  %l = load i32, ptr %B.gep
   %c = icmp eq i32 %l, 0
   br i1 %c, label %then, label %latch
 
 then:
-  %A.gep = getelementptr inbounds i32, i32* %A, i64 %iv
-  store i32 0, i32* %A.gep
+  %A.gep = getelementptr inbounds i32, ptr %A, i64 %iv
+  store i32 0, ptr %A.gep
   br label %latch
 
 latch:
   %sext = shl i32 %p.09, 24
   %conv = ashr exact i32 %sext, 24
   %add = add nsw i32 %conv, %step
-  store i32 %conv, i32* %B.gep, align 4
+  store i32 %conv, ptr %B.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %wide.trip.count
   br i1 %exitcond, label %exit, label %loop

diff  --git a/llvm/test/Transforms/LoopVectorize/vect.stats.ll b/llvm/test/Transforms/LoopVectorize/vect.stats.ll
index 0ca2edd009cbd..05aa13b22b6f4 100644
--- a/llvm/test/Transforms/LoopVectorize/vect.stats.ll
+++ b/llvm/test/Transforms/LoopVectorize/vect.stats.ll
@@ -10,7 +10,7 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define void @vectorized(float* nocapture %a, i64 %size) {
+define void @vectorized(ptr nocapture %a, i64 %size) {
 entry:
   %cmp1 = icmp sle i64 %size, 0
   %cmp21 = icmp sgt i64 0, %size
@@ -19,10 +19,10 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv2 = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv2
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv2
+  %0 = load float, ptr %arrayidx, align 4
   %mul = fmul float %0, %0
-  store float %mul, float* %arrayidx, align 4
+  store float %mul, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1
   %cmp2 = icmp sgt i64 %indvars.iv.next, %size
   br i1 %cmp2, label %for.end, label %for.body
@@ -31,7 +31,7 @@ for.end:                                          ; preds = %entry, %for.body
   ret void
 }
 
-define void @not_vectorized(float* nocapture %a, i64 %size) {
+define void @not_vectorized(ptr nocapture %a, i64 %size) {
 entry:
   %cmp1 = icmp sle i64 %size, 0
   %cmp21 = icmp sgt i64 0, %size
@@ -41,14 +41,14 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv2 = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %0 = add nsw i64 %indvars.iv2, -5
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %0
-  %1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %0
+  %1 = load float, ptr %arrayidx, align 4
   %2 = add nsw i64 %indvars.iv2, 2
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %2
-  %3 = load float, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %2
+  %3 = load float, ptr %arrayidx2, align 4
   %mul = fmul float %1, %3
-  %arrayidx4 = getelementptr inbounds float, float* %a, i64 %indvars.iv2
-  store float %mul, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %a, i64 %indvars.iv2
+  store float %mul, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1
   %cmp2 = icmp sgt i64 %indvars.iv.next, %size
   br i1 %cmp2, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/vector-geps.ll b/llvm/test/Transforms/LoopVectorize/vector-geps.ll
index cde787211b259..4f4b4f6600da4 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-geps.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-geps.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 
 ;
-define void @vector_gep_stored(i32** %a, i32 *%b, i64 %n) {
+define void @vector_gep_stored(ptr %a, ptr %b, i64 %n) {
 ; CHECK-LABEL: @vector_gep_stored(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
@@ -16,10 +16,9 @@ define void @vector_gep_stored(i32** %a, i32 *%b, i64 %n) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], <4 x i64> [[VEC_IND]]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32*, i32** [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32** [[TMP1]] to <4 x i32*>*
-; CHECK-NEXT:    store <4 x i32*> [[TMP0]], <4 x i32*>* [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], <4 x i64> [[VEC_IND]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <4 x ptr> [[TMP0]], ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -32,9 +31,9 @@ define void @vector_gep_stored(i32** %a, i32 *%b, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I]]
-; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[I]]
-; CHECK-NEXT:    store i32* [[VAR0]], i32** [[VAR1]], align 8
+; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
+; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[I]]
+; CHECK-NEXT:    store ptr [[VAR0]], ptr [[VAR1]], align 8
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -46,9 +45,9 @@ entry:
 
 for.body:
   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
-  %var0 = getelementptr inbounds i32, i32* %b, i64 %i
-  %var1 = getelementptr inbounds i32*, i32** %a, i64 %i
-  store i32* %var0, i32** %var1, align 8
+  %var0 = getelementptr inbounds i32, ptr %b, i64 %i
+  %var1 = getelementptr inbounds ptr, ptr %a, i64 %i
+  store ptr %var0, ptr %var1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -58,7 +57,7 @@ for.end:
 }
 
 ;
-define void @uniform_vector_gep_stored(i32** %a, i32 *%b, i64 %n) {
+define void @uniform_vector_gep_stored(ptr %a, ptr %b, i64 %n) {
 ; CHECK-LABEL: @uniform_vector_gep_stored(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
@@ -69,12 +68,11 @@ define void @uniform_vector_gep_stored(i32** %a, i32 *%b, i64 %n) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32*> poison, i32* [[TMP0]], i64 0
-; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32*> [[DOTSPLATINSERT]], <4 x i32*> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32*, i32** [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32** [[TMP1]] to <4 x i32*>*
-; CHECK-NEXT:    store <4 x i32*> [[DOTSPLAT]], <4 x i32*>* [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 1
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <4 x ptr> [[DOTSPLAT]], ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -86,9 +84,9 @@ define void @uniform_vector_gep_stored(i32** %a, i32 *%b, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 1
-; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[I]]
-; CHECK-NEXT:    store i32* [[VAR0]], i32** [[VAR1]], align 8
+; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 1
+; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[I]]
+; CHECK-NEXT:    store ptr [[VAR0]], ptr [[VAR1]], align 8
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -100,9 +98,9 @@ entry:
 
 for.body:
   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
-  %var0 = getelementptr inbounds i32, i32* %b, i64 1
-  %var1 = getelementptr inbounds i32*, i32** %a, i64 %i
-  store i32* %var0, i32** %var1, align 8
+  %var0 = getelementptr inbounds i32, ptr %b, i64 1
+  %var1 = getelementptr inbounds ptr, ptr %a, i64 %i
+  store ptr %var0, ptr %var1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end

diff  --git a/llvm/test/Transforms/LoopVectorize/vectorize-pointer-phis.ll b/llvm/test/Transforms/LoopVectorize/vectorize-pointer-phis.ll
index a21d4d48683d4..228cc941ecc30 100644
--- a/llvm/test/Transforms/LoopVectorize/vectorize-pointer-phis.ll
+++ b/llvm/test/Transforms/LoopVectorize/vectorize-pointer-phis.ll
@@ -2,7 +2,7 @@
 
 %s1 = type { [32000 x double], [32000 x double], [32000 x double] }
 
-define i32 @load_with_pointer_phi_no_runtime_checks(%s1* %data) {
+define i32 @load_with_pointer_phi_no_runtime_checks(ptr %data) {
 ; CHECK-LABEL: @load_with_pointer_phi_no_runtime_checks
 ; CHECK-NOT: memcheck
 ; CHECK:     vector.body:
@@ -14,22 +14,22 @@ loop.header:                                        ; preds = %loop.latch, %entr
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
   %iv.next = add nuw nsw i64 %iv, 1
   %cmp5 = icmp ult i64 %iv, 15999
-  %arrayidx = getelementptr inbounds %s1, %s1 * %data, i64 0, i32 0, i64 %iv
+  %arrayidx = getelementptr inbounds %s1, ptr %data, i64 0, i32 0, i64 %iv
   br i1 %cmp5, label %if.then, label %if.else
 
 if.then:                                          ; preds = %loop.header
-  %gep.1 = getelementptr inbounds %s1, %s1* %data, i64 0, i32 1, i64 %iv
+  %gep.1 = getelementptr inbounds %s1, ptr %data, i64 0, i32 1, i64 %iv
   br label %loop.latch
 
 if.else:                                          ; preds = %loop.header
-  %gep.2 = getelementptr inbounds %s1, %s1* %data, i64 0, i32 2, i64 %iv
+  %gep.2 = getelementptr inbounds %s1, ptr %data, i64 0, i32 2, i64 %iv
   br label %loop.latch
 
 loop.latch:                                          ; preds = %if.else, %if.then
-  %gep.2.sink = phi double* [ %gep.2, %if.else ], [ %gep.1, %if.then ]
-  %v8 = load double, double* %gep.2.sink, align 8
+  %gep.2.sink = phi ptr [ %gep.2, %if.else ], [ %gep.1, %if.then ]
+  %v8 = load double, ptr %gep.2.sink, align 8
   %mul16 = fmul double 3.0, %v8
-  store double %mul16, double* %arrayidx, align 8
+  store double %mul16, ptr %arrayidx, align 8
   %exitcond.not = icmp eq i64 %iv.next, 32000
   br i1 %exitcond.not, label %exit, label %loop.header
 
@@ -37,7 +37,7 @@ exit:                                             ; preds = %loop.latch
   ret i32 10
 }
 
-define i32 @store_with_pointer_phi_no_runtime_checks(%s1* %data) {
+define i32 @store_with_pointer_phi_no_runtime_checks(ptr %data) {
 ; CHECK-LABEL: @store_with_pointer_phi_no_runtime_checks
 ; CHECK-NOT: memcheck
 ; CHECK:     vector.body
@@ -49,22 +49,22 @@ loop.header:                                        ; preds = %loop.latch, %entr
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
   %iv.next = add nuw nsw i64 %iv, 1
   %cmp5 = icmp ult i64 %iv, 15999
-  %arrayidx = getelementptr inbounds %s1, %s1 * %data, i64 0, i32 0, i64 %iv
+  %arrayidx = getelementptr inbounds %s1, ptr %data, i64 0, i32 0, i64 %iv
   br i1 %cmp5, label %if.then, label %if.else
 
 if.then:                                          ; preds = %loop.header
-  %gep.1 = getelementptr inbounds %s1, %s1* %data, i64 0, i32 1, i64 %iv
+  %gep.1 = getelementptr inbounds %s1, ptr %data, i64 0, i32 1, i64 %iv
   br label %loop.latch
 
 if.else:                                          ; preds = %loop.header
-  %gep.2 = getelementptr inbounds %s1, %s1* %data, i64 0, i32 2, i64 %iv
+  %gep.2 = getelementptr inbounds %s1, ptr %data, i64 0, i32 2, i64 %iv
   br label %loop.latch
 
 loop.latch:                                          ; preds = %if.else, %if.then
-  %gep.2.sink = phi double* [ %gep.2, %if.else ], [ %gep.1, %if.then ]
-  %v8 = load double, double* %arrayidx, align 8
+  %gep.2.sink = phi ptr [ %gep.2, %if.else ], [ %gep.1, %if.then ]
+  %v8 = load double, ptr %arrayidx, align 8
   %mul16 = fmul double 3.0, %v8
-  store double %mul16, double* %gep.2.sink, align 8
+  store double %mul16, ptr %gep.2.sink, align 8
   %exitcond.not = icmp eq i64 %iv.next, 32000
   br i1 %exitcond.not, label %exit, label %loop.header
 
@@ -72,7 +72,7 @@ exit:                                             ; preds = %loop.latch
   ret i32 10
 }
 
-define i32 @store_with_pointer_phi_runtime_checks(double* %A, double* %B, double* %C) {
+define i32 @store_with_pointer_phi_runtime_checks(ptr %A, ptr %B, ptr %C) {
 ; CHECK-LABEL: @store_with_pointer_phi_runtime_checks
 ; CHECK:     memcheck
 ; CHECK:     vector.body
@@ -84,22 +84,22 @@ loop.header:                                        ; preds = %loop.latch, %entr
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
   %iv.next = add nuw nsw i64 %iv, 1
   %cmp5 = icmp ult i64 %iv, 15999
-  %arrayidx = getelementptr inbounds double, double* %A, i64 %iv
+  %arrayidx = getelementptr inbounds double, ptr %A, i64 %iv
   br i1 %cmp5, label %if.then, label %if.else
 
 if.then:                                          ; preds = %loop.header
-  %gep.1 = getelementptr inbounds double, double* %B, i64 %iv
+  %gep.1 = getelementptr inbounds double, ptr %B, i64 %iv
   br label %loop.latch
 
 if.else:                                          ; preds = %loop.header
-  %gep.2 = getelementptr inbounds double, double* %C, i64 %iv
+  %gep.2 = getelementptr inbounds double, ptr %C, i64 %iv
   br label %loop.latch
 
 loop.latch:                                          ; preds = %if.else, %if.then
-  %gep.2.sink = phi double* [ %gep.2, %if.else ], [ %gep.1, %if.then ]
-  %v8 = load double, double* %arrayidx, align 8
+  %gep.2.sink = phi ptr [ %gep.2, %if.else ], [ %gep.1, %if.then ]
+  %v8 = load double, ptr %arrayidx, align 8
   %mul16 = fmul double 3.0, %v8
-  store double %mul16, double* %gep.2.sink, align 8
+  store double %mul16, ptr %gep.2.sink, align 8
   %exitcond.not = icmp eq i64 %iv.next, 32000
   br i1 %exitcond.not, label %exit, label %loop.header
 
@@ -107,7 +107,7 @@ exit:                                             ; preds = %loop.latch
   ret i32 10
 }
 
-define i32 @load_with_pointer_phi_outside_loop(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) {
+define i32 @load_with_pointer_phi_outside_loop(ptr %A, ptr %B, ptr %C, i1 %c.0, i1 %c.1) {
 ; CHECK-LABEL: @load_with_pointer_phi_outside_loop
 ; CHECK-NOT: vector.body
 ;
@@ -118,20 +118,20 @@ if.then:
   br label %loop.ph
 
 if.else:
-  %ptr.select = select i1 %c.1, double* %C, double* %B
+  %ptr.select = select i1 %c.1, ptr %C, ptr %B
   br label %loop.ph
 
 loop.ph:
-  %ptr = phi double* [ %A, %if.then ], [ %ptr.select, %if.else ]
+  %ptr = phi ptr [ %A, %if.then ], [ %ptr.select, %if.else ]
   br label %loop.header
 
 loop.header:                                        ; preds = %loop.latch, %entry
   %iv = phi i64 [ 0, %loop.ph ], [ %iv.next, %loop.header ]
   %iv.next = add nuw nsw i64 %iv, 1
-  %arrayidx = getelementptr inbounds double, double* %A, i64 %iv
-  %v8 = load double, double* %ptr, align 8
+  %arrayidx = getelementptr inbounds double, ptr %A, i64 %iv
+  %v8 = load double, ptr %ptr, align 8
   %mul16 = fmul double 3.0, %v8
-  store double %mul16, double* %arrayidx, align 8
+  store double %mul16, ptr %arrayidx, align 8
   %exitcond.not = icmp eq i64 %iv.next, 32000
   br i1 %exitcond.not, label %exit, label %loop.header
 
@@ -139,7 +139,7 @@ exit:                                             ; preds = %loop.latch
   ret i32 10
 }
 
-define i32 @store_with_pointer_phi_outside_loop(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) {
+define i32 @store_with_pointer_phi_outside_loop(ptr %A, ptr %B, ptr %C, i1 %c.0, i1 %c.1) {
 ; CHECK-LABEL: @store_with_pointer_phi_outside_loop
 ; CHECK-NOT: vector.body
 ;
@@ -150,20 +150,20 @@ if.then:
   br label %loop.ph
 
 if.else:
-  %ptr.select = select i1 %c.1, double* %C, double* %B
+  %ptr.select = select i1 %c.1, ptr %C, ptr %B
   br label %loop.ph
 
 loop.ph:
-  %ptr = phi double* [ %A, %if.then ], [ %ptr.select, %if.else ]
+  %ptr = phi ptr [ %A, %if.then ], [ %ptr.select, %if.else ]
   br label %loop.header
 
 loop.header:                                        ; preds = %loop.latch, %entry
   %iv = phi i64 [ 0, %loop.ph ], [ %iv.next, %loop.header ]
   %iv.next = add nuw nsw i64 %iv, 1
-  %arrayidx = getelementptr inbounds double, double* %A, i64 %iv
-  %v8 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %A, i64 %iv
+  %v8 = load double, ptr %arrayidx, align 8
   %mul16 = fmul double 3.0, %v8
-  store double %mul16, double* %ptr, align 8
+  store double %mul16, ptr %ptr, align 8
   %exitcond.not = icmp eq i64 %iv.next, 32000
   br i1 %exitcond.not, label %exit, label %loop.header
 

diff  --git a/llvm/test/Transforms/LoopVectorize/vectorizeVFone.ll b/llvm/test/Transforms/LoopVectorize/vectorizeVFone.ll
index ea86c410731b8..dcc144e1234af 100644
--- a/llvm/test/Transforms/LoopVectorize/vectorizeVFone.ll
+++ b/llvm/test/Transforms/LoopVectorize/vectorizeVFone.ll
@@ -2,7 +2,7 @@
 
 %type = type { [3 x double] }
 
-define void @getScalarFunc(double* %A, double* %C, %type* %B) {
+define void @getScalarFunc(ptr %A, ptr %C, ptr %B) {
 ; CHECK-LABEL: getScalarFunc
 ; This check will catch also the massv version of the function.
 ; CHECK-NOT: call fast <{{[0-9]+}} x double> @{{.*}}atan(<{{[0-9]+}} x double> %{{[0-9]+}})
@@ -11,9 +11,9 @@ entry:
 
 for.body: 
   %i = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %dummyload2 = load double, double* %A, align 8
-  %arrayidx.i24 = getelementptr inbounds %type, %type* %B, i64 %i, i32 0, i32 0
-  %_15 = load double, double* %arrayidx.i24, align 8
+  %dummyload2 = load double, ptr %A, align 8
+  %arrayidx.i24 = getelementptr inbounds %type, ptr %B, i64 %i, i32 0, i32 0
+  %_15 = load double, ptr %arrayidx.i24, align 8
   %call10 = tail call fast double @atan(double %_15) #0
   %inc = add i64 %i, 1
   %cmp = icmp ugt i64 1000, %inc

diff  --git a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
index 994e9428d86c3..7bf4fbd89b0ee 100644
--- a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
+++ b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
@@ -9,9 +9,9 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; memchecks.
 
 ; CHECK-LABEL: test
-define void @test(i32*  %A, i64 %AStride,
-                  i32*  %B, i32 %BStride,
-                  i32*  %C, i64 %CStride, i32 %N) {
+define void @test(ptr  %A, i64 %AStride,
+                  ptr  %B, i32 %BStride,
+                  ptr  %C, i64 %CStride, i32 %N) {
 entry:
   %cmp13 = icmp eq i32 %N, 0
   br i1 %cmp13, label %for.end, label %for.body.preheader
@@ -34,15 +34,15 @@ for.body:
   %iv.trunc = trunc i64 %indvars.iv to i32
   %mul = mul i32 %iv.trunc, %BStride
   %mul64 = zext i32 %mul to i64
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %mul64
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %mul64
+  %0 = load i32, ptr %arrayidx, align 4
   %mul2 = mul nsw i64 %indvars.iv, %CStride
-  %arrayidx3 = getelementptr inbounds i32, i32* %C, i64 %mul2
-  %1 = load i32, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %C, i64 %mul2
+  %1 = load i32, ptr %arrayidx3, align 4
   %mul4 = mul nsw i32 %1, %0
   %mul3 = mul nsw i64 %indvars.iv, %AStride
-  %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %mul3
-  store i32 %mul4, i32* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %A, i64 %mul3
+  store i32 %mul4, ptr %arrayidx7, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %N
@@ -62,7 +62,7 @@ for.end:
 ; CHECK-LABEL: fn1
 ; CHECK: load <2 x double>
 
-define void @fn1(double* noalias %x, double* noalias %c, double %a) {
+define void @fn1(ptr noalias %x, ptr noalias %c, double %a) {
 entry:
   %conv = fptosi double %a to i32
   %conv2 = add i32 %conv, 4
@@ -77,10 +77,10 @@ for.body:
   %0 = trunc i64 %indvars.iv to i32
   %mul = mul nsw i32 %0, %conv
   %idxprom = sext i32 %mul to i64
-  %arrayidx = getelementptr inbounds double, double* %x, i64 %idxprom
-  %1 = load double, double* %arrayidx, align 8
-  %arrayidx3 = getelementptr inbounds double, double* %c, i64 %indvars.iv
-  store double %1, double* %arrayidx3, align 8
+  %arrayidx = getelementptr inbounds double, ptr %x, i64 %idxprom
+  %1 = load double, ptr %arrayidx, align 8
+  %arrayidx3 = getelementptr inbounds double, ptr %c, i64 %indvars.iv
+  store double %1, ptr %arrayidx3, align 8
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %conv2

diff  --git a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll
index 226af97360af7..a6a9cfa31a6c2 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll
@@ -6,7 +6,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; Verify that -vplan-print-in-dot-format option works.
 
-define void @print_call_and_memory(i64 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 ; CHECK:      digraph VPlan {
 ; CHECK-NEXT:  graph [labelloc=t, fontsize=30; label="Vectorization Plan\nInitial VPlan for VF=\{4\},UF\>=1"]
 ; CHECK-NEXT:  node [shape=rect, fontname=Courier, fontsize=30]
@@ -40,11 +40,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.sqrt.f32(float %lv) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %arrayidx2, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
index 2381796dbd19a..a497522498790 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
@@ -47,15 +47,15 @@ entry:
 
 outer.header:
   %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ]
-  %gep.1 = getelementptr inbounds [8 x i64], [8 x i64]* @arr2, i64 0, i64 %outer.iv
-  store i64 %outer.iv, i64* %gep.1, align 4
+  %gep.1 = getelementptr inbounds [8 x i64], ptr @arr2, i64 0, i64 %outer.iv
+  store i64 %outer.iv, ptr %gep.1, align 4
   %add = add nsw i64 %outer.iv, %n
   br label %inner
 
 inner:
   %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner ]
-  %gep.2 = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* @arr, i64 0, i64 %inner.iv, i64 %outer.iv
-  store i64 %add, i64* %gep.2, align 4
+  %gep.2 = getelementptr inbounds [8 x [8 x i64]], ptr @arr, i64 0, i64 %inner.iv, i64 %outer.iv
+  store i64 %add, ptr %gep.2, align 4
   %inner.iv.next = add nuw nsw i64 %inner.iv, 1
   %inner.ec = icmp eq i64 %inner.iv.next, 8
   br i1 %inner.ec, label %outer.latch, label %inner

diff  --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
index c4e7668a5c6b9..08cba06539a62 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
@@ -5,7 +5,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
 ; Make sure recipes with side-effects are not sunk.
-define void @sink_with_sideeffects(i1 %c, i8* %ptr) {
+define void @sink_with_sideeffects(i1 %c, ptr %ptr) {
 ; CHECK-LABEL: sink_with_sideeffects
 ; CHECK:      VPlan 'Initial VPlan for VF={1},UF>=1' {
 ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
@@ -60,15 +60,15 @@ entry:
 for.body:
   %tmp0 = phi i64 [ %tmp6, %for.inc ], [ 0, %entry ]
   %tmp1 = phi i64 [ %tmp7, %for.inc ], [ 0, %entry ]
-  %tmp2 = getelementptr i8, i8* %ptr, i64 %tmp0
-  %tmp3 = load i8, i8* %tmp2, align 1
-  store i8 0, i8* %tmp2
+  %tmp2 = getelementptr i8, ptr %ptr, i64 %tmp0
+  %tmp3 = load i8, ptr %tmp2, align 1
+  store i8 0, ptr %tmp2
   %tmp4 = zext i8 %tmp3 to i32
   %tmp5 = trunc i32 %tmp4 to i8
   br i1 %c, label %if.then, label %for.inc
 
 if.then:
-  store i8 %tmp5, i8* %tmp2, align 1
+  store i8 %tmp5, ptr %tmp2, align 1
   br label %for.inc
 
 for.inc:

diff  --git a/llvm/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll b/llvm/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll
index 784c390d039c1..eb3e069e2cf38 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll
@@ -17,17 +17,17 @@ entry:
 
 for.body:                                         ; preds = %for.inc8, %entry
   %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]
-  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, i64 %indvars.iv21
+  %arrayidx = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, i64 %indvars.iv21
   %0 = trunc i64 %indvars.iv21 to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %1 = trunc i64 %indvars.iv21 to i32
   %add = add nsw i32 %1, %n
   br label %for.body3
 
 for.body3:                                        ; preds = %for.body3, %for.body
   %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
-  %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
-  store i32 %add, i32* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
+  store i32 %add, ptr %arrayidx7, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 8
   br i1 %exitcond, label %for.inc8, label %for.body3

diff  --git a/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll b/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll
index a35da73553ce4..797a50290be8b 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll
@@ -9,7 +9,7 @@
 ; * Select conditition depending on both outer and inner loop iteration
 ;   variables.
 
-define void @loop_invariant_select(double* noalias nocapture %out, i1 %select, double %a, double %b) {
+define void @loop_invariant_select(ptr noalias nocapture %out, i1 %select, double %a, double %b) {
 ; CHECK-LABEL: @loop_invariant_select(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -22,25 +22,25 @@ define void @loop_invariant_select(double* noalias nocapture %out, i1 %select, d
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR1_LATCH4:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR1_LATCH4]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], <4 x i64> [[VEC_IND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds double, ptr [[OUT:%.*]], <4 x i64> [[VEC_IND]]
 ; CHECK-NEXT:    br label [[FOR2_HEADER1:%.*]]
 ; CHECK:       for2.header1:
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP2:%.*]], [[FOR2_HEADER1]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[SELECT:%.*]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> [[TMP1]], <4 x double*> [[TMP0]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP1]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 entry:
   br label %for1.header
 
 for1.header:
   %indvar1 = phi i64 [ 0, %entry ], [ %indvar11, %for1.latch ]
-  %ptr = getelementptr inbounds double, double* %out, i64 %indvar1
+  %ptr = getelementptr inbounds double, ptr %out, i64 %indvar1
   br label %for2.header
 
 for2.header:
   %indvar2 = phi i64 [ 0, %for1.header ], [ %indvar21, %for2.header ]
   ; Select condition is loop invariant for both inner and outer loop.
   %select.b = select i1 %select, double %a, double %b
-  store double %select.b, double* %ptr, align 8
+  store double %select.b, ptr %ptr, align 8
   %indvar21 = add nuw nsw i64 %indvar2, 1
   %for2.cond = icmp eq i64 %indvar21, 10000
   br i1 %for2.cond, label %for1.latch, label %for2.header
@@ -54,7 +54,7 @@ exit:
   ret void
 }
 
-define void @outer_loop_dependant_select(double* noalias nocapture %out, double %a, double %b) {
+define void @outer_loop_dependant_select(ptr noalias nocapture %out, double %a, double %b) {
 ; CHECK-LABEL: @outer_loop_dependant_select(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -67,19 +67,19 @@ define void @outer_loop_dependant_select(double* noalias nocapture %out, double
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR1_LATCH4:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR1_LATCH4]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], <4 x i64> [[VEC_IND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds double, ptr [[OUT:%.*]], <4 x i64> [[VEC_IND]]
 ; CHECK-NEXT:    br label [[FOR2_HEADER1:%.*]]
 ; CHECK:       for2.header1:
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP3:%.*]], [[FOR2_HEADER1]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i1>
 ; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> [[TMP2]], <4 x double*> [[TMP0]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP2]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 entry:
   br label %for1.header
 
 for1.header:
   %indvar1 = phi i64 [ 0, %entry ], [ %indvar11, %for1.latch ]
-  %ptr = getelementptr inbounds double, double* %out, i64 %indvar1
+  %ptr = getelementptr inbounds double, ptr %out, i64 %indvar1
   br label %for2.header
 
 for2.header:
@@ -87,7 +87,7 @@ for2.header:
   %select = trunc i64 %indvar1 to i1
   ; Select condition only depends on outer loop iteration variable.
   %select.b = select i1 %select, double %a, double %b
-  store double %select.b, double* %ptr, align 8
+  store double %select.b, ptr %ptr, align 8
   %indvar21 = add nuw nsw i64 %indvar2, 1
   %for2.cond = icmp eq i64 %indvar21, 10000
   br i1 %for2.cond, label %for1.latch, label %for2.header
@@ -101,7 +101,7 @@ exit:
   ret void
 }
 
-define void @inner_loop_dependant_select(double* noalias nocapture %out, double %a, double %b) {
+define void @inner_loop_dependant_select(ptr noalias nocapture %out, double %a, double %b) {
 ; CHECK-LABEL: @inner_loop_dependant_select(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -114,19 +114,19 @@ define void @inner_loop_dependant_select(double* noalias nocapture %out, double
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR1_LATCH4:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR1_LATCH4]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], <4 x i64> [[VEC_IND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds double, ptr [[OUT:%.*]], <4 x i64> [[VEC_IND]]
 ; CHECK-NEXT:    br label [[FOR2_HEADER1:%.*]]
 ; CHECK:       for2.header1:
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP3:%.*]], [[FOR2_HEADER1]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <4 x i64> [[VEC_PHI]] to <4 x i1>
 ; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> [[TMP2]], <4 x double*> [[TMP0]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP2]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 entry:
   br label %for1.header
 
 for1.header:
   %indvar1 = phi i64 [ 0, %entry ], [ %indvar11, %for1.latch ]
-  %ptr = getelementptr inbounds double, double* %out, i64 %indvar1
+  %ptr = getelementptr inbounds double, ptr %out, i64 %indvar1
   br label %for2.header
 
 for2.header:
@@ -134,7 +134,7 @@ for2.header:
   %select = trunc i64 %indvar2 to i1
   ; Select condition only depends on inner loop iteration variable.
   %select.b = select i1 %select, double %a, double %b
-  store double %select.b, double* %ptr, align 8
+  store double %select.b, ptr %ptr, align 8
   %indvar21 = add nuw nsw i64 %indvar2, 1
   %for2.cond = icmp eq i64 %indvar21, 10000
   br i1 %for2.cond, label %for1.latch, label %for2.header
@@ -148,7 +148,7 @@ exit:
   ret void
 }
 
-define void @outer_and_inner_loop_dependant_select(double* noalias nocapture %out, double %a, double %b) {
+define void @outer_and_inner_loop_dependant_select(ptr noalias nocapture %out, double %a, double %b) {
 ; CHECK-LABEL: @outer_and_inner_loop_dependant_select(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -161,20 +161,20 @@ define void @outer_and_inner_loop_dependant_select(double* noalias nocapture %ou
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR1_LATCH4:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR1_LATCH4]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], <4 x i64> [[VEC_IND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds double, ptr [[OUT:%.*]], <4 x i64> [[VEC_IND]]
 ; CHECK-NEXT:    br label [[FOR2_HEADER1:%.*]]
 ; CHECK:       for2.header1:
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR2_HEADER1]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i1>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> [[TMP3]], <4 x double*> [[TMP0]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP3]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 entry:
   br label %for1.header
 
 for1.header:
   %indvar1 = phi i64 [ 0, %entry ], [ %indvar11, %for1.latch ]
-  %ptr = getelementptr inbounds double, double* %out, i64 %indvar1
+  %ptr = getelementptr inbounds double, ptr %out, i64 %indvar1
   br label %for2.header
 
 for2.header:
@@ -183,7 +183,7 @@ for2.header:
   %select = trunc i64 %sum to i1
   ; Select condition depends on both inner and outer loop iteration variables.
   %select.b = select i1 %select, double %a, double %b
-  store double %select.b, double* %ptr, align 8
+  store double %select.b, ptr %ptr, align 8
   %indvar21 = add nuw nsw i64 %indvar2, 1
   %for2.cond = icmp eq i64 %indvar21, 10000
   br i1 %for2.cond, label %for1.latch, label %for2.header

diff  --git a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
index 7947a49bf0a1e..261b924ba51dc 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
@@ -10,7 +10,7 @@
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
-define void @foo(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) {
+define void @foo(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) {
 entry:
   %cmp32 = icmp sgt i32 %N, 0
   br i1 %cmp32, label %outer.ph, label %for.end15
@@ -33,10 +33,10 @@ inner.ph:
 inner.body:
   %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
   %2 = add nsw i64 %indvars.iv, %1
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
-  %3 = load i32, i32* %arrayidx, align 4
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %3, i32* %arrayidx12, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %2
+  %3 = load i32, ptr %arrayidx, align 4
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %3, ptr %arrayidx12, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %outer.inc, label %inner.body

diff  --git a/llvm/test/Transforms/LoopVectorize/write-only.ll b/llvm/test/Transforms/LoopVectorize/write-only.ll
index f99aed6ba7418..cc21b94bc3070 100644
--- a/llvm/test/Transforms/LoopVectorize/write-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/write-only.ll
@@ -5,16 +5,16 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK-LABEL: @read_mod_write_single_ptr(
 ;CHECK: load <4 x float>
 ;CHECK: ret i32
-define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwtable ssp {
+define i32 @read_mod_write_single_ptr(ptr nocapture %a, i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %3 = load float, float* %2, align 4
+  %2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %3 = load float, ptr %2, align 4
   %4 = fmul float %3, 3.000000e+00
-  store float %4, float* %2, align 4
+  store float %4, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -28,16 +28,16 @@ define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwta
 ; CHECK-LABEL: @read_mod_write_single_ptr_volatile_store(
 ; CHECK-NOT: store <4 x float>
 ; CHECK: ret i32
-define i32 @read_mod_write_single_ptr_volatile_store(float* nocapture %a, i32 %n) nounwind uwtable ssp {
+define i32 @read_mod_write_single_ptr_volatile_store(ptr nocapture %a, i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %3 = load float, float* %2, align 4
+  %2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %3 = load float, ptr %2, align 4
   %4 = fmul float %3, 3.000000e+00
-  store volatile float %4, float* %2, align 4
+  store volatile float %4, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n