[llvm] b31fffb - [ARM] Convert tests to opaque pointers (NFC)
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 5 04:57:57 PST 2024
Author: Nikita Popov
Date: 2024-02-05T13:56:59+01:00
New Revision: b31fffbc7f1e0491bf599e82b7195e320d26e140
URL: https://github.com/llvm/llvm-project/commit/b31fffbc7f1e0491bf599e82b7195e320d26e140
DIFF: https://github.com/llvm/llvm-project/commit/b31fffbc7f1e0491bf599e82b7195e320d26e140.diff
LOG: [ARM] Convert tests to opaque pointers (NFC)
Added:
Modified:
llvm/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
llvm/test/CodeGen/ARM/Windows/wineh-basic.ll
llvm/test/CodeGen/ARM/aes-erratum-fix.ll
llvm/test/CodeGen/ARM/aliases.ll
llvm/test/CodeGen/ARM/code-placement.ll
llvm/test/CodeGen/ARM/constant-island-movwt.mir
llvm/test/CodeGen/ARM/cortex-a57-misched-basic.ll
llvm/test/CodeGen/ARM/debug-info-blocks.ll
llvm/test/CodeGen/ARM/debug-info-d16-reg.ll
llvm/test/CodeGen/ARM/debug-info-s16-reg.ll
llvm/test/CodeGen/ARM/dwarf-eh.ll
llvm/test/CodeGen/ARM/ldrcppic.ll
llvm/test/CodeGen/ARM/misched-copy-arm.ll
llvm/test/CodeGen/ARM/no-register-coalescing-in-returnsTwice.mir
llvm/test/CodeGen/ARM/readonly-aliases.ll
llvm/test/CodeGen/ARM/tail-dup-kill-flags.ll
llvm/test/CodeGen/Thumb/PR36658.mir
llvm/test/CodeGen/Thumb/branch-to-return.ll
llvm/test/CodeGen/Thumb/tbb-reuse.mir
llvm/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/constbound.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/count_dominates_start.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/disjoint-vcmp.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/extract-element.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-16.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-32.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-8.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-1.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-2.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-3.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp-reordered.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/livereg-no-loop-def.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-liveout.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix-debug.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-lr-terminator.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-block-cond-iter-count.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-cond-iter-count.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiple-do-loops.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-store.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-invariant.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-header.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/subreg-liveness.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/unrolled-and-vector.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/vcmp-vpst-combination.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subi3.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri12.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmaxmin_vpred_r.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-block-debug.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/wls-revert-placement.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir
llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll
llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll
llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll
llvm/test/CodeGen/Thumb2/mve-phireg.ll
llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll
llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll
llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll
llvm/test/CodeGen/Thumb2/mve-qrintrsplat.ll
llvm/test/CodeGen/Thumb2/mve-vecreduce-add-combine.ll
llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll
llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll
llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll
llvm/test/CodeGen/Thumb2/scavenge-lr.mir
llvm/test/CodeGen/Thumb2/t2-teq-reduce.mir
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll b/llvm/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
index 80812a395995c..ce23d0e73282e 100644
--- a/llvm/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
+++ b/llvm/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
@@ -11,7 +11,7 @@ bb74.i: ; preds = %bb88.i, %bb74.i, %entry
bb88.i: ; preds = %bb74.i
br i1 false, label %mandel.exit, label %bb74.i
mandel.exit: ; preds = %bb88.i
- %tmp2 = load volatile double, ptr getelementptr ({ double, double }, ptr @accum, i32 0, i32 0), align 8 ; <double> [#uses=1]
+ %tmp2 = load volatile double, ptr @accum, align 8 ; <double> [#uses=1]
%tmp23 = fptosi double %tmp2 to i32 ; <i32> [#uses=1]
%tmp5 = tail call i32 (ptr, ...) @printf( ptr @.str, i32 %tmp23 ) ; <i32> [#uses=0]
ret i32 0
diff --git a/llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll b/llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
index 5fc3f6e80c362..9029f08de5c80 100644
--- a/llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
+++ b/llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
@@ -1,95 +1,95 @@
; RUN: llc -mtriple armv6-apple-darwin10 -mattr=+vfp2 -filetype asm -o - %s | FileCheck %s
-%struct.EDGE_PAIR = type { %struct.edge_rec*, %struct.edge_rec* }
+%struct.EDGE_PAIR = type { ptr, ptr }
%struct.VEC2 = type { double, double, double }
-%struct.VERTEX = type { %struct.VEC2, %struct.VERTEX*, %struct.VERTEX* }
-%struct.edge_rec = type { %struct.VERTEX*, %struct.edge_rec*, i32, i8* }
- at avail_edge = internal global %struct.edge_rec* null
+%struct.VERTEX = type { %struct.VEC2, ptr, ptr }
+%struct.edge_rec = type { ptr, ptr, i32, ptr }
+ at avail_edge = internal global ptr null
@_2E_str7 = internal constant [21 x i8] c"ERROR: Only 1 point!\00", section "__TEXT,__cstring,cstring_literals", align 1
- at llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.EDGE_PAIR*, %struct.VERTEX*, %struct.VERTEX*)* @build_delaunay to i8*)], section "llvm.metadata"
+ at llvm.used = appending global [1 x ptr] [ptr @build_delaunay], section "llvm.metadata"
-define void @build_delaunay(%struct.EDGE_PAIR* noalias nocapture sret(%struct.EDGE_PAIR) %agg.result, %struct.VERTEX* %tree, %struct.VERTEX* %extra) nounwind {
+define void @build_delaunay(ptr noalias nocapture sret(%struct.EDGE_PAIR) %agg.result, ptr %tree, ptr %extra) nounwind {
entry:
%delright = alloca %struct.EDGE_PAIR, align 8
%delleft = alloca %struct.EDGE_PAIR, align 8
- %0 = icmp eq %struct.VERTEX* %tree, null
+ %0 = icmp eq ptr %tree, null
br i1 %0, label %bb8, label %bb
bb:
- %1 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 2
- %2 = load %struct.VERTEX*, %struct.VERTEX** %1, align 4
- %3 = icmp eq %struct.VERTEX* %2, null
+ %1 = getelementptr %struct.VERTEX, ptr %tree, i32 0, i32 2
+ %2 = load ptr, ptr %1, align 4
+ %3 = icmp eq ptr %2, null
br i1 %3, label %bb7, label %bb1.i
bb1.i:
- %tree_addr.0.i = phi %struct.VERTEX* [ %5, %bb1.i ], [ %tree, %bb ]
- %4 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree_addr.0.i, i32 0, i32 1
- %5 = load %struct.VERTEX*, %struct.VERTEX** %4, align 4
- %6 = icmp eq %struct.VERTEX* %5, null
+ %tree_addr.0.i = phi ptr [ %5, %bb1.i ], [ %tree, %bb ]
+ %4 = getelementptr %struct.VERTEX, ptr %tree_addr.0.i, i32 0, i32 1
+ %5 = load ptr, ptr %4, align 4
+ %6 = icmp eq ptr %5, null
br i1 %6, label %get_low.exit, label %bb1.i
get_low.exit:
- call void @build_delaunay(%struct.EDGE_PAIR* noalias sret(%struct.EDGE_PAIR) %delright, %struct.VERTEX* %2, %struct.VERTEX* %extra) nounwind
- %7 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 1
- %8 = load %struct.VERTEX*, %struct.VERTEX** %7, align 4
- call void @build_delaunay(%struct.EDGE_PAIR* noalias sret(%struct.EDGE_PAIR) %delleft, %struct.VERTEX* %8, %struct.VERTEX* %tree) nounwind
- %9 = getelementptr %struct.EDGE_PAIR, %struct.EDGE_PAIR* %delleft, i32 0, i32 0
- %10 = load %struct.edge_rec*, %struct.edge_rec** %9, align 8
- %11 = getelementptr %struct.EDGE_PAIR, %struct.EDGE_PAIR* %delleft, i32 0, i32 1
- %12 = load %struct.edge_rec*, %struct.edge_rec** %11, align 4
- %13 = getelementptr %struct.EDGE_PAIR, %struct.EDGE_PAIR* %delright, i32 0, i32 0
- %14 = load %struct.edge_rec*, %struct.edge_rec** %13, align 8
- %15 = getelementptr %struct.EDGE_PAIR, %struct.EDGE_PAIR* %delright, i32 0, i32 1
- %16 = load %struct.edge_rec*, %struct.edge_rec** %15, align 4
+ call void @build_delaunay(ptr noalias sret(%struct.EDGE_PAIR) %delright, ptr %2, ptr %extra) nounwind
+ %7 = getelementptr %struct.VERTEX, ptr %tree, i32 0, i32 1
+ %8 = load ptr, ptr %7, align 4
+ call void @build_delaunay(ptr noalias sret(%struct.EDGE_PAIR) %delleft, ptr %8, ptr %tree) nounwind
+ %9 = getelementptr %struct.EDGE_PAIR, ptr %delleft, i32 0, i32 0
+ %10 = load ptr, ptr %9, align 8
+ %11 = getelementptr %struct.EDGE_PAIR, ptr %delleft, i32 0, i32 1
+ %12 = load ptr, ptr %11, align 4
+ %13 = getelementptr %struct.EDGE_PAIR, ptr %delright, i32 0, i32 0
+ %14 = load ptr, ptr %13, align 8
+ %15 = getelementptr %struct.EDGE_PAIR, ptr %delright, i32 0, i32 1
+ %16 = load ptr, ptr %15, align 4
br label %bb.i
bb.i:
- %rdi_addr.0.i = phi %struct.edge_rec* [ %14, %get_low.exit ], [ %72, %bb4.i ]
- %ldi_addr.1.i = phi %struct.edge_rec* [ %12, %get_low.exit ], [ %ldi_addr.0.i, %bb4.i ]
- %17 = getelementptr %struct.edge_rec, %struct.edge_rec* %rdi_addr.0.i, i32 0, i32 0
- %18 = load %struct.VERTEX*, %struct.VERTEX** %17, align 4
- %19 = ptrtoint %struct.edge_rec* %ldi_addr.1.i to i32
- %20 = getelementptr %struct.VERTEX, %struct.VERTEX* %18, i32 0, i32 0, i32 0
- %21 = load double, double* %20, align 4
- %22 = getelementptr %struct.VERTEX, %struct.VERTEX* %18, i32 0, i32 0, i32 1
- %23 = load double, double* %22, align 4
+ %rdi_addr.0.i = phi ptr [ %14, %get_low.exit ], [ %72, %bb4.i ]
+ %ldi_addr.1.i = phi ptr [ %12, %get_low.exit ], [ %ldi_addr.0.i, %bb4.i ]
+ %17 = getelementptr %struct.edge_rec, ptr %rdi_addr.0.i, i32 0, i32 0
+ %18 = load ptr, ptr %17, align 4
+ %19 = ptrtoint ptr %ldi_addr.1.i to i32
+ %20 = getelementptr %struct.VERTEX, ptr %18, i32 0, i32 0, i32 0
+ %21 = load double, ptr %20, align 4
+ %22 = getelementptr %struct.VERTEX, ptr %18, i32 0, i32 0, i32 1
+ %23 = load double, ptr %22, align 4
br label %bb2.i
bb1.i1:
- %24 = ptrtoint %struct.edge_rec* %ldi_addr.0.i to i32
+ %24 = ptrtoint ptr %ldi_addr.0.i to i32
%25 = add i32 %24, 48
%26 = and i32 %25, 63
%27 = and i32 %24, -64
%28 = or i32 %26, %27
- %29 = inttoptr i32 %28 to %struct.edge_rec*
- %30 = getelementptr %struct.edge_rec, %struct.edge_rec* %29, i32 0, i32 1
- %31 = load %struct.edge_rec*, %struct.edge_rec** %30, align 4
- %32 = ptrtoint %struct.edge_rec* %31 to i32
+ %29 = inttoptr i32 %28 to ptr
+ %30 = getelementptr %struct.edge_rec, ptr %29, i32 0, i32 1
+ %31 = load ptr, ptr %30, align 4
+ %32 = ptrtoint ptr %31 to i32
%33 = add i32 %32, 16
%34 = and i32 %33, 63
%35 = and i32 %32, -64
%36 = or i32 %34, %35
- %37 = inttoptr i32 %36 to %struct.edge_rec*
+ %37 = inttoptr i32 %36 to ptr
br label %bb2.i
bb2.i:
- %ldi_addr.1.pn.i = phi %struct.edge_rec* [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ]
+ %ldi_addr.1.pn.i = phi ptr [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ]
%.pn6.in.in.i = phi i32 [ %19, %bb.i ], [ %36, %bb1.i1 ]
- %ldi_addr.0.i = phi %struct.edge_rec* [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ]
+ %ldi_addr.0.i = phi ptr [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ]
%.pn6.in.i = xor i32 %.pn6.in.in.i, 32
- %.pn6.i = inttoptr i32 %.pn6.in.i to %struct.edge_rec*
- %t1.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %ldi_addr.1.pn.i, i32 0, i32 0
- %t2.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn6.i, i32 0, i32 0
- %t1.0.i = load %struct.VERTEX*, %struct.VERTEX** %t1.0.in.i
- %t2.0.i = load %struct.VERTEX*, %struct.VERTEX** %t2.0.in.i
- %38 = getelementptr %struct.VERTEX, %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 0
- %39 = load double, double* %38, align 4
- %40 = getelementptr %struct.VERTEX, %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 1
- %41 = load double, double* %40, align 4
- %42 = getelementptr %struct.VERTEX, %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 0
- %43 = load double, double* %42, align 4
- %44 = getelementptr %struct.VERTEX, %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 1
- %45 = load double, double* %44, align 4
+ %.pn6.i = inttoptr i32 %.pn6.in.i to ptr
+ %t1.0.in.i = getelementptr %struct.edge_rec, ptr %ldi_addr.1.pn.i, i32 0, i32 0
+ %t2.0.in.i = getelementptr %struct.edge_rec, ptr %.pn6.i, i32 0, i32 0
+ %t1.0.i = load ptr, ptr %t1.0.in.i
+ %t2.0.i = load ptr, ptr %t2.0.in.i
+ %38 = getelementptr %struct.VERTEX, ptr %t1.0.i, i32 0, i32 0, i32 0
+ %39 = load double, ptr %38, align 4
+ %40 = getelementptr %struct.VERTEX, ptr %t1.0.i, i32 0, i32 0, i32 1
+ %41 = load double, ptr %40, align 4
+ %42 = getelementptr %struct.VERTEX, ptr %t2.0.i, i32 0, i32 0, i32 0
+ %43 = load double, ptr %42, align 4
+ %44 = getelementptr %struct.VERTEX, ptr %t2.0.i, i32 0, i32 0, i32 1
+ %45 = load double, ptr %44, align 4
%46 = fsub double %39, %21
%47 = fsub double %45, %23
%48 = fmul double %46, %47
@@ -101,15 +101,15 @@ bb2.i:
br i1 %53, label %bb1.i1, label %bb3.i
bb3.i:
- %54 = ptrtoint %struct.edge_rec* %rdi_addr.0.i to i32
+ %54 = ptrtoint ptr %rdi_addr.0.i to i32
%55 = xor i32 %54, 32
- %56 = inttoptr i32 %55 to %struct.edge_rec*
- %57 = getelementptr %struct.edge_rec, %struct.edge_rec* %56, i32 0, i32 0
- %58 = load %struct.VERTEX*, %struct.VERTEX** %57, align 4
- %59 = getelementptr %struct.VERTEX, %struct.VERTEX* %58, i32 0, i32 0, i32 0
- %60 = load double, double* %59, align 4
- %61 = getelementptr %struct.VERTEX, %struct.VERTEX* %58, i32 0, i32 0, i32 1
- %62 = load double, double* %61, align 4
+ %56 = inttoptr i32 %55 to ptr
+ %57 = getelementptr %struct.edge_rec, ptr %56, i32 0, i32 0
+ %58 = load ptr, ptr %57, align 4
+ %59 = getelementptr %struct.VERTEX, ptr %58, i32 0, i32 0, i32 0
+ %60 = load double, ptr %59, align 4
+ %61 = getelementptr %struct.VERTEX, ptr %58, i32 0, i32 0, i32 1
+ %62 = load double, ptr %61, align 4
%63 = fsub double %60, %39
%64 = fsub double %23, %41
%65 = fmul double %63, %64
@@ -121,8 +121,8 @@ bb3.i:
br i1 %70, label %bb4.i, label %bb5.i
bb4.i:
- %71 = getelementptr %struct.edge_rec, %struct.edge_rec* %56, i32 0, i32 1
- %72 = load %struct.edge_rec*, %struct.edge_rec** %71, align 4
+ %71 = getelementptr %struct.edge_rec, ptr %56, i32 0, i32 1
+ %72 = load ptr, ptr %71, align 4
br label %bb.i
bb5.i:
@@ -130,145 +130,145 @@ bb5.i:
%74 = and i32 %73, 63
%75 = and i32 %55, -64
%76 = or i32 %74, %75
- %77 = inttoptr i32 %76 to %struct.edge_rec*
- %78 = getelementptr %struct.edge_rec, %struct.edge_rec* %77, i32 0, i32 1
- %79 = load %struct.edge_rec*, %struct.edge_rec** %78, align 4
- %80 = ptrtoint %struct.edge_rec* %79 to i32
+ %77 = inttoptr i32 %76 to ptr
+ %78 = getelementptr %struct.edge_rec, ptr %77, i32 0, i32 1
+ %79 = load ptr, ptr %78, align 4
+ %80 = ptrtoint ptr %79 to i32
%81 = add i32 %80, 16
%82 = and i32 %81, 63
%83 = and i32 %80, -64
%84 = or i32 %82, %83
- %85 = inttoptr i32 %84 to %struct.edge_rec*
- %86 = getelementptr %struct.edge_rec, %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 0
- %87 = load %struct.VERTEX*, %struct.VERTEX** %86, align 4
- %88 = call %struct.edge_rec* @alloc_edge() nounwind
- %89 = getelementptr %struct.edge_rec, %struct.edge_rec* %88, i32 0, i32 1
- store %struct.edge_rec* %88, %struct.edge_rec** %89, align 4
- %90 = getelementptr %struct.edge_rec, %struct.edge_rec* %88, i32 0, i32 0
- store %struct.VERTEX* %18, %struct.VERTEX** %90, align 4
- %91 = ptrtoint %struct.edge_rec* %88 to i32
+ %85 = inttoptr i32 %84 to ptr
+ %86 = getelementptr %struct.edge_rec, ptr %ldi_addr.0.i, i32 0, i32 0
+ %87 = load ptr, ptr %86, align 4
+ %88 = call ptr @alloc_edge() nounwind
+ %89 = getelementptr %struct.edge_rec, ptr %88, i32 0, i32 1
+ store ptr %88, ptr %89, align 4
+ %90 = getelementptr %struct.edge_rec, ptr %88, i32 0, i32 0
+ store ptr %18, ptr %90, align 4
+ %91 = ptrtoint ptr %88 to i32
%92 = add i32 %91, 16
- %93 = inttoptr i32 %92 to %struct.edge_rec*
+ %93 = inttoptr i32 %92 to ptr
%94 = add i32 %91, 48
- %95 = inttoptr i32 %94 to %struct.edge_rec*
- %96 = getelementptr %struct.edge_rec, %struct.edge_rec* %93, i32 0, i32 1
- store %struct.edge_rec* %95, %struct.edge_rec** %96, align 4
+ %95 = inttoptr i32 %94 to ptr
+ %96 = getelementptr %struct.edge_rec, ptr %93, i32 0, i32 1
+ store ptr %95, ptr %96, align 4
%97 = add i32 %91, 32
- %98 = inttoptr i32 %97 to %struct.edge_rec*
- %99 = getelementptr %struct.edge_rec, %struct.edge_rec* %98, i32 0, i32 1
- store %struct.edge_rec* %98, %struct.edge_rec** %99, align 4
- %100 = getelementptr %struct.edge_rec, %struct.edge_rec* %98, i32 0, i32 0
- store %struct.VERTEX* %87, %struct.VERTEX** %100, align 4
- %101 = getelementptr %struct.edge_rec, %struct.edge_rec* %95, i32 0, i32 1
- store %struct.edge_rec* %93, %struct.edge_rec** %101, align 4
- %102 = load %struct.edge_rec*, %struct.edge_rec** %89, align 4
- %103 = ptrtoint %struct.edge_rec* %102 to i32
+ %98 = inttoptr i32 %97 to ptr
+ %99 = getelementptr %struct.edge_rec, ptr %98, i32 0, i32 1
+ store ptr %98, ptr %99, align 4
+ %100 = getelementptr %struct.edge_rec, ptr %98, i32 0, i32 0
+ store ptr %87, ptr %100, align 4
+ %101 = getelementptr %struct.edge_rec, ptr %95, i32 0, i32 1
+ store ptr %93, ptr %101, align 4
+ %102 = load ptr, ptr %89, align 4
+ %103 = ptrtoint ptr %102 to i32
%104 = add i32 %103, 16
%105 = and i32 %104, 63
%106 = and i32 %103, -64
%107 = or i32 %105, %106
- %108 = inttoptr i32 %107 to %struct.edge_rec*
- %109 = getelementptr %struct.edge_rec, %struct.edge_rec* %85, i32 0, i32 1
- %110 = load %struct.edge_rec*, %struct.edge_rec** %109, align 4
- %111 = ptrtoint %struct.edge_rec* %110 to i32
+ %108 = inttoptr i32 %107 to ptr
+ %109 = getelementptr %struct.edge_rec, ptr %85, i32 0, i32 1
+ %110 = load ptr, ptr %109, align 4
+ %111 = ptrtoint ptr %110 to i32
%112 = add i32 %111, 16
%113 = and i32 %112, 63
%114 = and i32 %111, -64
%115 = or i32 %113, %114
- %116 = inttoptr i32 %115 to %struct.edge_rec*
- %117 = getelementptr %struct.edge_rec, %struct.edge_rec* %116, i32 0, i32 1
- %118 = load %struct.edge_rec*, %struct.edge_rec** %117, align 4
- %119 = getelementptr %struct.edge_rec, %struct.edge_rec* %108, i32 0, i32 1
- %120 = load %struct.edge_rec*, %struct.edge_rec** %119, align 4
- store %struct.edge_rec* %118, %struct.edge_rec** %119, align 4
- store %struct.edge_rec* %120, %struct.edge_rec** %117, align 4
- %121 = load %struct.edge_rec*, %struct.edge_rec** %89, align 4
- %122 = load %struct.edge_rec*, %struct.edge_rec** %109, align 4
- store %struct.edge_rec* %121, %struct.edge_rec** %109, align 4
- store %struct.edge_rec* %122, %struct.edge_rec** %89, align 4
+ %116 = inttoptr i32 %115 to ptr
+ %117 = getelementptr %struct.edge_rec, ptr %116, i32 0, i32 1
+ %118 = load ptr, ptr %117, align 4
+ %119 = getelementptr %struct.edge_rec, ptr %108, i32 0, i32 1
+ %120 = load ptr, ptr %119, align 4
+ store ptr %118, ptr %119, align 4
+ store ptr %120, ptr %117, align 4
+ %121 = load ptr, ptr %89, align 4
+ %122 = load ptr, ptr %109, align 4
+ store ptr %121, ptr %109, align 4
+ store ptr %122, ptr %89, align 4
%123 = xor i32 %91, 32
- %124 = inttoptr i32 %123 to %struct.edge_rec*
- %125 = getelementptr %struct.edge_rec, %struct.edge_rec* %124, i32 0, i32 1
- %126 = load %struct.edge_rec*, %struct.edge_rec** %125, align 4
- %127 = ptrtoint %struct.edge_rec* %126 to i32
+ %124 = inttoptr i32 %123 to ptr
+ %125 = getelementptr %struct.edge_rec, ptr %124, i32 0, i32 1
+ %126 = load ptr, ptr %125, align 4
+ %127 = ptrtoint ptr %126 to i32
%128 = add i32 %127, 16
%129 = and i32 %128, 63
%130 = and i32 %127, -64
%131 = or i32 %129, %130
- %132 = inttoptr i32 %131 to %struct.edge_rec*
- %133 = getelementptr %struct.edge_rec, %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 1
- %134 = load %struct.edge_rec*, %struct.edge_rec** %133, align 4
- %135 = ptrtoint %struct.edge_rec* %134 to i32
+ %132 = inttoptr i32 %131 to ptr
+ %133 = getelementptr %struct.edge_rec, ptr %ldi_addr.0.i, i32 0, i32 1
+ %134 = load ptr, ptr %133, align 4
+ %135 = ptrtoint ptr %134 to i32
%136 = add i32 %135, 16
%137 = and i32 %136, 63
%138 = and i32 %135, -64
%139 = or i32 %137, %138
- %140 = inttoptr i32 %139 to %struct.edge_rec*
- %141 = getelementptr %struct.edge_rec, %struct.edge_rec* %140, i32 0, i32 1
- %142 = load %struct.edge_rec*, %struct.edge_rec** %141, align 4
- %143 = getelementptr %struct.edge_rec, %struct.edge_rec* %132, i32 0, i32 1
- %144 = load %struct.edge_rec*, %struct.edge_rec** %143, align 4
- store %struct.edge_rec* %142, %struct.edge_rec** %143, align 4
- store %struct.edge_rec* %144, %struct.edge_rec** %141, align 4
- %145 = load %struct.edge_rec*, %struct.edge_rec** %125, align 4
- %146 = load %struct.edge_rec*, %struct.edge_rec** %133, align 4
- store %struct.edge_rec* %145, %struct.edge_rec** %133, align 4
- store %struct.edge_rec* %146, %struct.edge_rec** %125, align 4
+ %140 = inttoptr i32 %139 to ptr
+ %141 = getelementptr %struct.edge_rec, ptr %140, i32 0, i32 1
+ %142 = load ptr, ptr %141, align 4
+ %143 = getelementptr %struct.edge_rec, ptr %132, i32 0, i32 1
+ %144 = load ptr, ptr %143, align 4
+ store ptr %142, ptr %143, align 4
+ store ptr %144, ptr %141, align 4
+ %145 = load ptr, ptr %125, align 4
+ %146 = load ptr, ptr %133, align 4
+ store ptr %145, ptr %133, align 4
+ store ptr %146, ptr %125, align 4
%147 = and i32 %92, 63
%148 = and i32 %91, -64
%149 = or i32 %147, %148
- %150 = inttoptr i32 %149 to %struct.edge_rec*
- %151 = getelementptr %struct.edge_rec, %struct.edge_rec* %150, i32 0, i32 1
- %152 = load %struct.edge_rec*, %struct.edge_rec** %151, align 4
- %153 = ptrtoint %struct.edge_rec* %152 to i32
+ %150 = inttoptr i32 %149 to ptr
+ %151 = getelementptr %struct.edge_rec, ptr %150, i32 0, i32 1
+ %152 = load ptr, ptr %151, align 4
+ %153 = ptrtoint ptr %152 to i32
%154 = add i32 %153, 16
%155 = and i32 %154, 63
%156 = and i32 %153, -64
%157 = or i32 %155, %156
- %158 = inttoptr i32 %157 to %struct.edge_rec*
- %159 = load %struct.VERTEX*, %struct.VERTEX** %90, align 4
- %160 = getelementptr %struct.edge_rec, %struct.edge_rec* %124, i32 0, i32 0
- %161 = load %struct.VERTEX*, %struct.VERTEX** %160, align 4
- %162 = getelementptr %struct.edge_rec, %struct.edge_rec* %16, i32 0, i32 0
- %163 = load %struct.VERTEX*, %struct.VERTEX** %162, align 4
- %164 = icmp eq %struct.VERTEX* %163, %159
- %rdo_addr.0.i = select i1 %164, %struct.edge_rec* %88, %struct.edge_rec* %16
- %165 = getelementptr %struct.edge_rec, %struct.edge_rec* %10, i32 0, i32 0
- %166 = load %struct.VERTEX*, %struct.VERTEX** %165, align 4
- %167 = icmp eq %struct.VERTEX* %166, %161
- %ldo_addr.0.ph.i = select i1 %167, %struct.edge_rec* %124, %struct.edge_rec* %10
+ %158 = inttoptr i32 %157 to ptr
+ %159 = load ptr, ptr %90, align 4
+ %160 = getelementptr %struct.edge_rec, ptr %124, i32 0, i32 0
+ %161 = load ptr, ptr %160, align 4
+ %162 = getelementptr %struct.edge_rec, ptr %16, i32 0, i32 0
+ %163 = load ptr, ptr %162, align 4
+ %164 = icmp eq ptr %163, %159
+ %rdo_addr.0.i = select i1 %164, ptr %88, ptr %16
+ %165 = getelementptr %struct.edge_rec, ptr %10, i32 0, i32 0
+ %166 = load ptr, ptr %165, align 4
+ %167 = icmp eq ptr %166, %161
+ %ldo_addr.0.ph.i = select i1 %167, ptr %124, ptr %10
br label %bb9.i
bb9.i:
- %lcand.2.i = phi %struct.edge_rec* [ %146, %bb5.i ], [ %lcand.1.i, %bb24.i ], [ %739, %bb25.i ]
- %rcand.2.i = phi %struct.edge_rec* [ %158, %bb5.i ], [ %666, %bb24.i ], [ %rcand.1.i, %bb25.i ]
- %basel.0.i = phi %struct.edge_rec* [ %88, %bb5.i ], [ %595, %bb24.i ], [ %716, %bb25.i ]
- %168 = getelementptr %struct.edge_rec, %struct.edge_rec* %lcand.2.i, i32 0, i32 1
- %169 = load %struct.edge_rec*, %struct.edge_rec** %168, align 4
- %170 = getelementptr %struct.edge_rec, %struct.edge_rec* %basel.0.i, i32 0, i32 0
- %171 = load %struct.VERTEX*, %struct.VERTEX** %170, align 4
- %172 = ptrtoint %struct.edge_rec* %basel.0.i to i32
+ %lcand.2.i = phi ptr [ %146, %bb5.i ], [ %lcand.1.i, %bb24.i ], [ %739, %bb25.i ]
+ %rcand.2.i = phi ptr [ %158, %bb5.i ], [ %666, %bb24.i ], [ %rcand.1.i, %bb25.i ]
+ %basel.0.i = phi ptr [ %88, %bb5.i ], [ %595, %bb24.i ], [ %716, %bb25.i ]
+ %168 = getelementptr %struct.edge_rec, ptr %lcand.2.i, i32 0, i32 1
+ %169 = load ptr, ptr %168, align 4
+ %170 = getelementptr %struct.edge_rec, ptr %basel.0.i, i32 0, i32 0
+ %171 = load ptr, ptr %170, align 4
+ %172 = ptrtoint ptr %basel.0.i to i32
%173 = xor i32 %172, 32
- %174 = inttoptr i32 %173 to %struct.edge_rec*
- %175 = getelementptr %struct.edge_rec, %struct.edge_rec* %174, i32 0, i32 0
- %176 = load %struct.VERTEX*, %struct.VERTEX** %175, align 4
- %177 = ptrtoint %struct.edge_rec* %169 to i32
+ %174 = inttoptr i32 %173 to ptr
+ %175 = getelementptr %struct.edge_rec, ptr %174, i32 0, i32 0
+ %176 = load ptr, ptr %175, align 4
+ %177 = ptrtoint ptr %169 to i32
%178 = xor i32 %177, 32
- %179 = inttoptr i32 %178 to %struct.edge_rec*
- %180 = getelementptr %struct.edge_rec, %struct.edge_rec* %179, i32 0, i32 0
- %181 = load %struct.VERTEX*, %struct.VERTEX** %180, align 4
- %182 = getelementptr %struct.VERTEX, %struct.VERTEX* %171, i32 0, i32 0, i32 0
- %183 = load double, double* %182, align 4
- %184 = getelementptr %struct.VERTEX, %struct.VERTEX* %171, i32 0, i32 0, i32 1
- %185 = load double, double* %184, align 4
- %186 = getelementptr %struct.VERTEX, %struct.VERTEX* %181, i32 0, i32 0, i32 0
- %187 = load double, double* %186, align 4
- %188 = getelementptr %struct.VERTEX, %struct.VERTEX* %181, i32 0, i32 0, i32 1
- %189 = load double, double* %188, align 4
- %190 = getelementptr %struct.VERTEX, %struct.VERTEX* %176, i32 0, i32 0, i32 0
- %191 = load double, double* %190, align 4
- %192 = getelementptr %struct.VERTEX, %struct.VERTEX* %176, i32 0, i32 0, i32 1
- %193 = load double, double* %192, align 4
+ %179 = inttoptr i32 %178 to ptr
+ %180 = getelementptr %struct.edge_rec, ptr %179, i32 0, i32 0
+ %181 = load ptr, ptr %180, align 4
+ %182 = getelementptr %struct.VERTEX, ptr %171, i32 0, i32 0, i32 0
+ %183 = load double, ptr %182, align 4
+ %184 = getelementptr %struct.VERTEX, ptr %171, i32 0, i32 0, i32 1
+ %185 = load double, ptr %184, align 4
+ %186 = getelementptr %struct.VERTEX, ptr %181, i32 0, i32 0, i32 0
+ %187 = load double, ptr %186, align 4
+ %188 = getelementptr %struct.VERTEX, ptr %181, i32 0, i32 0, i32 1
+ %189 = load double, ptr %188, align 4
+ %190 = getelementptr %struct.VERTEX, ptr %176, i32 0, i32 0, i32 0
+ %191 = load double, ptr %190, align 4
+ %192 = getelementptr %struct.VERTEX, ptr %176, i32 0, i32 0, i32 1
+ %193 = load double, ptr %192, align 4
%194 = fsub double %183, %191
%195 = fsub double %189, %193
%196 = fmul double %194, %195
@@ -280,146 +280,146 @@ bb9.i:
br i1 %201, label %bb10.i, label %bb13.i
bb10.i:
- %202 = getelementptr %struct.VERTEX, %struct.VERTEX* %171, i32 0, i32 0, i32 2
- %avail_edge.promoted25 = load %struct.edge_rec*, %struct.edge_rec** @avail_edge
+ %202 = getelementptr %struct.VERTEX, ptr %171, i32 0, i32 0, i32 2
+ %avail_edge.promoted25 = load ptr, ptr @avail_edge
br label %bb12.i
bb11.i:
- %203 = ptrtoint %struct.edge_rec* %lcand.0.i to i32
+ %203 = ptrtoint ptr %lcand.0.i to i32
%204 = add i32 %203, 16
%205 = and i32 %204, 63
%206 = and i32 %203, -64
%207 = or i32 %205, %206
- %208 = inttoptr i32 %207 to %struct.edge_rec*
- %209 = getelementptr %struct.edge_rec, %struct.edge_rec* %208, i32 0, i32 1
- %210 = load %struct.edge_rec*, %struct.edge_rec** %209, align 4
- %211 = ptrtoint %struct.edge_rec* %210 to i32
+ %208 = inttoptr i32 %207 to ptr
+ %209 = getelementptr %struct.edge_rec, ptr %208, i32 0, i32 1
+ %210 = load ptr, ptr %209, align 4
+ %211 = ptrtoint ptr %210 to i32
%212 = add i32 %211, 16
%213 = and i32 %212, 63
%214 = and i32 %211, -64
%215 = or i32 %213, %214
- %216 = inttoptr i32 %215 to %struct.edge_rec*
- %217 = getelementptr %struct.edge_rec, %struct.edge_rec* %lcand.0.i, i32 0, i32 1
- %218 = load %struct.edge_rec*, %struct.edge_rec** %217, align 4
- %219 = ptrtoint %struct.edge_rec* %218 to i32
+ %216 = inttoptr i32 %215 to ptr
+ %217 = getelementptr %struct.edge_rec, ptr %lcand.0.i, i32 0, i32 1
+ %218 = load ptr, ptr %217, align 4
+ %219 = ptrtoint ptr %218 to i32
%220 = add i32 %219, 16
%221 = and i32 %220, 63
%222 = and i32 %219, -64
%223 = or i32 %221, %222
- %224 = inttoptr i32 %223 to %struct.edge_rec*
- %225 = getelementptr %struct.edge_rec, %struct.edge_rec* %216, i32 0, i32 1
- %226 = load %struct.edge_rec*, %struct.edge_rec** %225, align 4
- %227 = ptrtoint %struct.edge_rec* %226 to i32
+ %224 = inttoptr i32 %223 to ptr
+ %225 = getelementptr %struct.edge_rec, ptr %216, i32 0, i32 1
+ %226 = load ptr, ptr %225, align 4
+ %227 = ptrtoint ptr %226 to i32
%228 = add i32 %227, 16
%229 = and i32 %228, 63
%230 = and i32 %227, -64
%231 = or i32 %229, %230
- %232 = inttoptr i32 %231 to %struct.edge_rec*
- %233 = getelementptr %struct.edge_rec, %struct.edge_rec* %232, i32 0, i32 1
- %234 = load %struct.edge_rec*, %struct.edge_rec** %233, align 4
- %235 = getelementptr %struct.edge_rec, %struct.edge_rec* %224, i32 0, i32 1
- %236 = load %struct.edge_rec*, %struct.edge_rec** %235, align 4
- store %struct.edge_rec* %234, %struct.edge_rec** %235, align 4
- store %struct.edge_rec* %236, %struct.edge_rec** %233, align 4
- %237 = load %struct.edge_rec*, %struct.edge_rec** %217, align 4
- %238 = load %struct.edge_rec*, %struct.edge_rec** %225, align 4
- store %struct.edge_rec* %237, %struct.edge_rec** %225, align 4
- store %struct.edge_rec* %238, %struct.edge_rec** %217, align 4
+ %232 = inttoptr i32 %231 to ptr
+ %233 = getelementptr %struct.edge_rec, ptr %232, i32 0, i32 1
+ %234 = load ptr, ptr %233, align 4
+ %235 = getelementptr %struct.edge_rec, ptr %224, i32 0, i32 1
+ %236 = load ptr, ptr %235, align 4
+ store ptr %234, ptr %235, align 4
+ store ptr %236, ptr %233, align 4
+ %237 = load ptr, ptr %217, align 4
+ %238 = load ptr, ptr %225, align 4
+ store ptr %237, ptr %225, align 4
+ store ptr %238, ptr %217, align 4
%239 = xor i32 %203, 32
%240 = add i32 %239, 16
%241 = and i32 %240, 63
%242 = or i32 %241, %206
- %243 = inttoptr i32 %242 to %struct.edge_rec*
- %244 = getelementptr %struct.edge_rec, %struct.edge_rec* %243, i32 0, i32 1
- %245 = load %struct.edge_rec*, %struct.edge_rec** %244, align 4
- %246 = ptrtoint %struct.edge_rec* %245 to i32
+ %243 = inttoptr i32 %242 to ptr
+ %244 = getelementptr %struct.edge_rec, ptr %243, i32 0, i32 1
+ %245 = load ptr, ptr %244, align 4
+ %246 = ptrtoint ptr %245 to i32
%247 = add i32 %246, 16
%248 = and i32 %247, 63
%249 = and i32 %246, -64
%250 = or i32 %248, %249
- %251 = inttoptr i32 %250 to %struct.edge_rec*
- %252 = inttoptr i32 %239 to %struct.edge_rec*
- %253 = getelementptr %struct.edge_rec, %struct.edge_rec* %252, i32 0, i32 1
- %254 = load %struct.edge_rec*, %struct.edge_rec** %253, align 4
- %255 = ptrtoint %struct.edge_rec* %254 to i32
+ %251 = inttoptr i32 %250 to ptr
+ %252 = inttoptr i32 %239 to ptr
+ %253 = getelementptr %struct.edge_rec, ptr %252, i32 0, i32 1
+ %254 = load ptr, ptr %253, align 4
+ %255 = ptrtoint ptr %254 to i32
%256 = add i32 %255, 16
%257 = and i32 %256, 63
%258 = and i32 %255, -64
%259 = or i32 %257, %258
- %260 = inttoptr i32 %259 to %struct.edge_rec*
- %261 = getelementptr %struct.edge_rec, %struct.edge_rec* %251, i32 0, i32 1
- %262 = load %struct.edge_rec*, %struct.edge_rec** %261, align 4
- %263 = ptrtoint %struct.edge_rec* %262 to i32
+ %260 = inttoptr i32 %259 to ptr
+ %261 = getelementptr %struct.edge_rec, ptr %251, i32 0, i32 1
+ %262 = load ptr, ptr %261, align 4
+ %263 = ptrtoint ptr %262 to i32
%264 = add i32 %263, 16
%265 = and i32 %264, 63
%266 = and i32 %263, -64
%267 = or i32 %265, %266
- %268 = inttoptr i32 %267 to %struct.edge_rec*
- %269 = getelementptr %struct.edge_rec, %struct.edge_rec* %268, i32 0, i32 1
- %270 = load %struct.edge_rec*, %struct.edge_rec** %269, align 4
- %271 = getelementptr %struct.edge_rec, %struct.edge_rec* %260, i32 0, i32 1
- %272 = load %struct.edge_rec*, %struct.edge_rec** %271, align 4
- store %struct.edge_rec* %270, %struct.edge_rec** %271, align 4
- store %struct.edge_rec* %272, %struct.edge_rec** %269, align 4
- %273 = load %struct.edge_rec*, %struct.edge_rec** %253, align 4
- %274 = load %struct.edge_rec*, %struct.edge_rec** %261, align 4
- store %struct.edge_rec* %273, %struct.edge_rec** %261, align 4
- store %struct.edge_rec* %274, %struct.edge_rec** %253, align 4
- %275 = inttoptr i32 %206 to %struct.edge_rec*
- %276 = getelementptr %struct.edge_rec, %struct.edge_rec* %275, i32 0, i32 1
- store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** %276, align 4
- %277 = getelementptr %struct.edge_rec, %struct.edge_rec* %t.0.i, i32 0, i32 1
- %278 = load %struct.edge_rec*, %struct.edge_rec** %277, align 4
- %.pre.i = load double, double* %182, align 4
- %.pre22.i = load double, double* %184, align 4
+ %268 = inttoptr i32 %267 to ptr
+ %269 = getelementptr %struct.edge_rec, ptr %268, i32 0, i32 1
+ %270 = load ptr, ptr %269, align 4
+ %271 = getelementptr %struct.edge_rec, ptr %260, i32 0, i32 1
+ %272 = load ptr, ptr %271, align 4
+ store ptr %270, ptr %271, align 4
+ store ptr %272, ptr %269, align 4
+ %273 = load ptr, ptr %253, align 4
+ %274 = load ptr, ptr %261, align 4
+ store ptr %273, ptr %261, align 4
+ store ptr %274, ptr %253, align 4
+ %275 = inttoptr i32 %206 to ptr
+ %276 = getelementptr %struct.edge_rec, ptr %275, i32 0, i32 1
+ store ptr %avail_edge.tmp.026, ptr %276, align 4
+ %277 = getelementptr %struct.edge_rec, ptr %t.0.i, i32 0, i32 1
+ %278 = load ptr, ptr %277, align 4
+ %.pre.i = load double, ptr %182, align 4
+ %.pre22.i = load double, ptr %184, align 4
br label %bb12.i
bb12.i:
- %avail_edge.tmp.026 = phi %struct.edge_rec* [ %avail_edge.promoted25, %bb10.i ], [ %275, %bb11.i ]
+ %avail_edge.tmp.026 = phi ptr [ %avail_edge.promoted25, %bb10.i ], [ %275, %bb11.i ]
%279 = phi double [ %.pre22.i, %bb11.i ], [ %185, %bb10.i ]
%280 = phi double [ %.pre.i, %bb11.i ], [ %183, %bb10.i ]
- %lcand.0.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]
- %t.0.i = phi %struct.edge_rec* [ %169, %bb10.i ], [ %278, %bb11.i ]
- %.pn5.in.in.in.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]
- %.pn4.in.in.in.i = phi %struct.edge_rec* [ %169, %bb10.i ], [ %278, %bb11.i ]
- %lcand.2.pn.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]
- %.pn5.in.in.i = ptrtoint %struct.edge_rec* %.pn5.in.in.in.i to i32
- %.pn4.in.in.i = ptrtoint %struct.edge_rec* %.pn4.in.in.in.i to i32
+ %lcand.0.i = phi ptr [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]
+ %t.0.i = phi ptr [ %169, %bb10.i ], [ %278, %bb11.i ]
+ %.pn5.in.in.in.i = phi ptr [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]
+ %.pn4.in.in.in.i = phi ptr [ %169, %bb10.i ], [ %278, %bb11.i ]
+ %lcand.2.pn.i = phi ptr [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]
+ %.pn5.in.in.i = ptrtoint ptr %.pn5.in.in.in.i to i32
+ %.pn4.in.in.i = ptrtoint ptr %.pn4.in.in.in.i to i32
%.pn5.in.i = xor i32 %.pn5.in.in.i, 32
%.pn4.in.i = xor i32 %.pn4.in.in.i, 32
- %.pn5.i = inttoptr i32 %.pn5.in.i to %struct.edge_rec*
- %.pn4.i = inttoptr i32 %.pn4.in.i to %struct.edge_rec*
- %v1.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn5.i, i32 0, i32 0
- %v2.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn4.i, i32 0, i32 0
- %v3.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %lcand.2.pn.i, i32 0, i32 0
- %v1.0.i = load %struct.VERTEX*, %struct.VERTEX** %v1.0.in.i
- %v2.0.i = load %struct.VERTEX*, %struct.VERTEX** %v2.0.in.i
- %v3.0.i = load %struct.VERTEX*, %struct.VERTEX** %v3.0.in.i
- %281 = load double, double* %202, align 4
- %282 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 0
- %283 = load double, double* %282, align 4
+ %.pn5.i = inttoptr i32 %.pn5.in.i to ptr
+ %.pn4.i = inttoptr i32 %.pn4.in.i to ptr
+ %v1.0.in.i = getelementptr %struct.edge_rec, ptr %.pn5.i, i32 0, i32 0
+ %v2.0.in.i = getelementptr %struct.edge_rec, ptr %.pn4.i, i32 0, i32 0
+ %v3.0.in.i = getelementptr %struct.edge_rec, ptr %lcand.2.pn.i, i32 0, i32 0
+ %v1.0.i = load ptr, ptr %v1.0.in.i
+ %v2.0.i = load ptr, ptr %v2.0.in.i
+ %v3.0.i = load ptr, ptr %v3.0.in.i
+ %281 = load double, ptr %202, align 4
+ %282 = getelementptr %struct.VERTEX, ptr %v1.0.i, i32 0, i32 0, i32 0
+ %283 = load double, ptr %282, align 4
%284 = fsub double %283, %280
- %285 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 1
- %286 = load double, double* %285, align 4
+ %285 = getelementptr %struct.VERTEX, ptr %v1.0.i, i32 0, i32 0, i32 1
+ %286 = load double, ptr %285, align 4
%287 = fsub double %286, %279
- %288 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 2
- %289 = load double, double* %288, align 4
- %290 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 0
- %291 = load double, double* %290, align 4
+ %288 = getelementptr %struct.VERTEX, ptr %v1.0.i, i32 0, i32 0, i32 2
+ %289 = load double, ptr %288, align 4
+ %290 = getelementptr %struct.VERTEX, ptr %v2.0.i, i32 0, i32 0, i32 0
+ %291 = load double, ptr %290, align 4
%292 = fsub double %291, %280
- %293 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 1
- %294 = load double, double* %293, align 4
+ %293 = getelementptr %struct.VERTEX, ptr %v2.0.i, i32 0, i32 0, i32 1
+ %294 = load double, ptr %293, align 4
%295 = fsub double %294, %279
- %296 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 2
- %297 = load double, double* %296, align 4
- %298 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 0
- %299 = load double, double* %298, align 4
+ %296 = getelementptr %struct.VERTEX, ptr %v2.0.i, i32 0, i32 0, i32 2
+ %297 = load double, ptr %296, align 4
+ %298 = getelementptr %struct.VERTEX, ptr %v3.0.i, i32 0, i32 0, i32 0
+ %299 = load double, ptr %298, align 4
%300 = fsub double %299, %280
- %301 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 1
- %302 = load double, double* %301, align 4
+ %301 = getelementptr %struct.VERTEX, ptr %v3.0.i, i32 0, i32 0, i32 1
+ %302 = load double, ptr %301, align 4
%303 = fsub double %302, %279
- %304 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 2
- %305 = load double, double* %304, align 4
+ %304 = getelementptr %struct.VERTEX, ptr %v3.0.i, i32 0, i32 0, i32 2
+ %305 = load double, ptr %304, align 4
%306 = fsub double %289, %281
%307 = fmul double %292, %303
%308 = fmul double %295, %300
@@ -441,44 +441,44 @@ bb12.i:
br i1 %323, label %bb11.i, label %bb13.loopexit.i
bb13.loopexit.i:
- store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** @avail_edge
- %.pre23.i = load %struct.VERTEX*, %struct.VERTEX** %170, align 4
- %.pre24.i = load %struct.VERTEX*, %struct.VERTEX** %175, align 4
+ store ptr %avail_edge.tmp.026, ptr @avail_edge
+ %.pre23.i = load ptr, ptr %170, align 4
+ %.pre24.i = load ptr, ptr %175, align 4
br label %bb13.i
bb13.i:
- %324 = phi %struct.VERTEX* [ %.pre24.i, %bb13.loopexit.i ], [ %176, %bb9.i ]
- %325 = phi %struct.VERTEX* [ %.pre23.i, %bb13.loopexit.i ], [ %171, %bb9.i ]
- %lcand.1.i = phi %struct.edge_rec* [ %lcand.0.i, %bb13.loopexit.i ], [ %lcand.2.i, %bb9.i ]
- %326 = ptrtoint %struct.edge_rec* %rcand.2.i to i32
+ %324 = phi ptr [ %.pre24.i, %bb13.loopexit.i ], [ %176, %bb9.i ]
+ %325 = phi ptr [ %.pre23.i, %bb13.loopexit.i ], [ %171, %bb9.i ]
+ %lcand.1.i = phi ptr [ %lcand.0.i, %bb13.loopexit.i ], [ %lcand.2.i, %bb9.i ]
+ %326 = ptrtoint ptr %rcand.2.i to i32
%327 = add i32 %326, 16
%328 = and i32 %327, 63
%329 = and i32 %326, -64
%330 = or i32 %328, %329
- %331 = inttoptr i32 %330 to %struct.edge_rec*
- %332 = getelementptr %struct.edge_rec, %struct.edge_rec* %331, i32 0, i32 1
- %333 = load %struct.edge_rec*, %struct.edge_rec** %332, align 4
- %334 = ptrtoint %struct.edge_rec* %333 to i32
+ %331 = inttoptr i32 %330 to ptr
+ %332 = getelementptr %struct.edge_rec, ptr %331, i32 0, i32 1
+ %333 = load ptr, ptr %332, align 4
+ %334 = ptrtoint ptr %333 to i32
%335 = add i32 %334, 16
%336 = and i32 %335, 63
%337 = and i32 %334, -64
%338 = or i32 %336, %337
%339 = xor i32 %338, 32
- %340 = inttoptr i32 %339 to %struct.edge_rec*
- %341 = getelementptr %struct.edge_rec, %struct.edge_rec* %340, i32 0, i32 0
- %342 = load %struct.VERTEX*, %struct.VERTEX** %341, align 4
- %343 = getelementptr %struct.VERTEX, %struct.VERTEX* %325, i32 0, i32 0, i32 0
- %344 = load double, double* %343, align 4
- %345 = getelementptr %struct.VERTEX, %struct.VERTEX* %325, i32 0, i32 0, i32 1
- %346 = load double, double* %345, align 4
- %347 = getelementptr %struct.VERTEX, %struct.VERTEX* %342, i32 0, i32 0, i32 0
- %348 = load double, double* %347, align 4
- %349 = getelementptr %struct.VERTEX, %struct.VERTEX* %342, i32 0, i32 0, i32 1
- %350 = load double, double* %349, align 4
- %351 = getelementptr %struct.VERTEX, %struct.VERTEX* %324, i32 0, i32 0, i32 0
- %352 = load double, double* %351, align 4
- %353 = getelementptr %struct.VERTEX, %struct.VERTEX* %324, i32 0, i32 0, i32 1
- %354 = load double, double* %353, align 4
+ %340 = inttoptr i32 %339 to ptr
+ %341 = getelementptr %struct.edge_rec, ptr %340, i32 0, i32 0
+ %342 = load ptr, ptr %341, align 4
+ %343 = getelementptr %struct.VERTEX, ptr %325, i32 0, i32 0, i32 0
+ %344 = load double, ptr %343, align 4
+ %345 = getelementptr %struct.VERTEX, ptr %325, i32 0, i32 0, i32 1
+ %346 = load double, ptr %345, align 4
+ %347 = getelementptr %struct.VERTEX, ptr %342, i32 0, i32 0, i32 0
+ %348 = load double, ptr %347, align 4
+ %349 = getelementptr %struct.VERTEX, ptr %342, i32 0, i32 0, i32 1
+ %350 = load double, ptr %349, align 4
+ %351 = getelementptr %struct.VERTEX, ptr %324, i32 0, i32 0, i32 0
+ %352 = load double, ptr %351, align 4
+ %353 = getelementptr %struct.VERTEX, ptr %324, i32 0, i32 0, i32 1
+ %354 = load double, ptr %353, align 4
%355 = fsub double %344, %352
%356 = fsub double %350, %354
%357 = fmul double %355, %356
@@ -490,156 +490,156 @@ bb13.i:
br i1 %362, label %bb14.i, label %bb17.i
bb14.i:
- %363 = getelementptr %struct.VERTEX, %struct.VERTEX* %324, i32 0, i32 0, i32 2
- %avail_edge.promoted = load %struct.edge_rec*, %struct.edge_rec** @avail_edge
+ %363 = getelementptr %struct.VERTEX, ptr %324, i32 0, i32 0, i32 2
+ %avail_edge.promoted = load ptr, ptr @avail_edge
br label %bb16.i
bb15.i:
- %364 = ptrtoint %struct.edge_rec* %rcand.0.i to i32
+ %364 = ptrtoint ptr %rcand.0.i to i32
%365 = add i32 %364, 16
%366 = and i32 %365, 63
%367 = and i32 %364, -64
%368 = or i32 %366, %367
- %369 = inttoptr i32 %368 to %struct.edge_rec*
- %370 = getelementptr %struct.edge_rec, %struct.edge_rec* %369, i32 0, i32 1
- %371 = load %struct.edge_rec*, %struct.edge_rec** %370, align 4
- %372 = ptrtoint %struct.edge_rec* %371 to i32
+ %369 = inttoptr i32 %368 to ptr
+ %370 = getelementptr %struct.edge_rec, ptr %369, i32 0, i32 1
+ %371 = load ptr, ptr %370, align 4
+ %372 = ptrtoint ptr %371 to i32
%373 = add i32 %372, 16
%374 = and i32 %373, 63
%375 = and i32 %372, -64
%376 = or i32 %374, %375
- %377 = inttoptr i32 %376 to %struct.edge_rec*
- %378 = getelementptr %struct.edge_rec, %struct.edge_rec* %rcand.0.i, i32 0, i32 1
- %379 = load %struct.edge_rec*, %struct.edge_rec** %378, align 4
- %380 = ptrtoint %struct.edge_rec* %379 to i32
+ %377 = inttoptr i32 %376 to ptr
+ %378 = getelementptr %struct.edge_rec, ptr %rcand.0.i, i32 0, i32 1
+ %379 = load ptr, ptr %378, align 4
+ %380 = ptrtoint ptr %379 to i32
%381 = add i32 %380, 16
%382 = and i32 %381, 63
%383 = and i32 %380, -64
%384 = or i32 %382, %383
- %385 = inttoptr i32 %384 to %struct.edge_rec*
- %386 = getelementptr %struct.edge_rec, %struct.edge_rec* %377, i32 0, i32 1
- %387 = load %struct.edge_rec*, %struct.edge_rec** %386, align 4
- %388 = ptrtoint %struct.edge_rec* %387 to i32
+ %385 = inttoptr i32 %384 to ptr
+ %386 = getelementptr %struct.edge_rec, ptr %377, i32 0, i32 1
+ %387 = load ptr, ptr %386, align 4
+ %388 = ptrtoint ptr %387 to i32
%389 = add i32 %388, 16
%390 = and i32 %389, 63
%391 = and i32 %388, -64
%392 = or i32 %390, %391
- %393 = inttoptr i32 %392 to %struct.edge_rec*
- %394 = getelementptr %struct.edge_rec, %struct.edge_rec* %393, i32 0, i32 1
- %395 = load %struct.edge_rec*, %struct.edge_rec** %394, align 4
- %396 = getelementptr %struct.edge_rec, %struct.edge_rec* %385, i32 0, i32 1
- %397 = load %struct.edge_rec*, %struct.edge_rec** %396, align 4
- store %struct.edge_rec* %395, %struct.edge_rec** %396, align 4
- store %struct.edge_rec* %397, %struct.edge_rec** %394, align 4
- %398 = load %struct.edge_rec*, %struct.edge_rec** %378, align 4
- %399 = load %struct.edge_rec*, %struct.edge_rec** %386, align 4
- store %struct.edge_rec* %398, %struct.edge_rec** %386, align 4
- store %struct.edge_rec* %399, %struct.edge_rec** %378, align 4
+ %393 = inttoptr i32 %392 to ptr
+ %394 = getelementptr %struct.edge_rec, ptr %393, i32 0, i32 1
+ %395 = load ptr, ptr %394, align 4
+ %396 = getelementptr %struct.edge_rec, ptr %385, i32 0, i32 1
+ %397 = load ptr, ptr %396, align 4
+ store ptr %395, ptr %396, align 4
+ store ptr %397, ptr %394, align 4
+ %398 = load ptr, ptr %378, align 4
+ %399 = load ptr, ptr %386, align 4
+ store ptr %398, ptr %386, align 4
+ store ptr %399, ptr %378, align 4
%400 = xor i32 %364, 32
%401 = add i32 %400, 16
%402 = and i32 %401, 63
%403 = or i32 %402, %367
- %404 = inttoptr i32 %403 to %struct.edge_rec*
- %405 = getelementptr %struct.edge_rec, %struct.edge_rec* %404, i32 0, i32 1
- %406 = load %struct.edge_rec*, %struct.edge_rec** %405, align 4
- %407 = ptrtoint %struct.edge_rec* %406 to i32
+ %404 = inttoptr i32 %403 to ptr
+ %405 = getelementptr %struct.edge_rec, ptr %404, i32 0, i32 1
+ %406 = load ptr, ptr %405, align 4
+ %407 = ptrtoint ptr %406 to i32
%408 = add i32 %407, 16
%409 = and i32 %408, 63
%410 = and i32 %407, -64
%411 = or i32 %409, %410
- %412 = inttoptr i32 %411 to %struct.edge_rec*
- %413 = inttoptr i32 %400 to %struct.edge_rec*
- %414 = getelementptr %struct.edge_rec, %struct.edge_rec* %413, i32 0, i32 1
- %415 = load %struct.edge_rec*, %struct.edge_rec** %414, align 4
- %416 = ptrtoint %struct.edge_rec* %415 to i32
+ %412 = inttoptr i32 %411 to ptr
+ %413 = inttoptr i32 %400 to ptr
+ %414 = getelementptr %struct.edge_rec, ptr %413, i32 0, i32 1
+ %415 = load ptr, ptr %414, align 4
+ %416 = ptrtoint ptr %415 to i32
%417 = add i32 %416, 16
%418 = and i32 %417, 63
%419 = and i32 %416, -64
%420 = or i32 %418, %419
- %421 = inttoptr i32 %420 to %struct.edge_rec*
- %422 = getelementptr %struct.edge_rec, %struct.edge_rec* %412, i32 0, i32 1
- %423 = load %struct.edge_rec*, %struct.edge_rec** %422, align 4
- %424 = ptrtoint %struct.edge_rec* %423 to i32
+ %421 = inttoptr i32 %420 to ptr
+ %422 = getelementptr %struct.edge_rec, ptr %412, i32 0, i32 1
+ %423 = load ptr, ptr %422, align 4
+ %424 = ptrtoint ptr %423 to i32
%425 = add i32 %424, 16
%426 = and i32 %425, 63
%427 = and i32 %424, -64
%428 = or i32 %426, %427
- %429 = inttoptr i32 %428 to %struct.edge_rec*
- %430 = getelementptr %struct.edge_rec, %struct.edge_rec* %429, i32 0, i32 1
- %431 = load %struct.edge_rec*, %struct.edge_rec** %430, align 4
- %432 = getelementptr %struct.edge_rec, %struct.edge_rec* %421, i32 0, i32 1
- %433 = load %struct.edge_rec*, %struct.edge_rec** %432, align 4
- store %struct.edge_rec* %431, %struct.edge_rec** %432, align 4
- store %struct.edge_rec* %433, %struct.edge_rec** %430, align 4
- %434 = load %struct.edge_rec*, %struct.edge_rec** %414, align 4
- %435 = load %struct.edge_rec*, %struct.edge_rec** %422, align 4
- store %struct.edge_rec* %434, %struct.edge_rec** %422, align 4
- store %struct.edge_rec* %435, %struct.edge_rec** %414, align 4
- %436 = inttoptr i32 %367 to %struct.edge_rec*
- %437 = getelementptr %struct.edge_rec, %struct.edge_rec* %436, i32 0, i32 1
- store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** %437, align 4
+ %429 = inttoptr i32 %428 to ptr
+ %430 = getelementptr %struct.edge_rec, ptr %429, i32 0, i32 1
+ %431 = load ptr, ptr %430, align 4
+ %432 = getelementptr %struct.edge_rec, ptr %421, i32 0, i32 1
+ %433 = load ptr, ptr %432, align 4
+ store ptr %431, ptr %432, align 4
+ store ptr %433, ptr %430, align 4
+ %434 = load ptr, ptr %414, align 4
+ %435 = load ptr, ptr %422, align 4
+ store ptr %434, ptr %422, align 4
+ store ptr %435, ptr %414, align 4
+ %436 = inttoptr i32 %367 to ptr
+ %437 = getelementptr %struct.edge_rec, ptr %436, i32 0, i32 1
+ store ptr %avail_edge.tmp.0, ptr %437, align 4
%438 = add i32 %t.1.in.i, 16
%439 = and i32 %438, 63
%440 = and i32 %t.1.in.i, -64
%441 = or i32 %439, %440
- %442 = inttoptr i32 %441 to %struct.edge_rec*
- %443 = getelementptr %struct.edge_rec, %struct.edge_rec* %442, i32 0, i32 1
- %444 = load %struct.edge_rec*, %struct.edge_rec** %443, align 4
- %445 = ptrtoint %struct.edge_rec* %444 to i32
+ %442 = inttoptr i32 %441 to ptr
+ %443 = getelementptr %struct.edge_rec, ptr %442, i32 0, i32 1
+ %444 = load ptr, ptr %443, align 4
+ %445 = ptrtoint ptr %444 to i32
%446 = add i32 %445, 16
%447 = and i32 %446, 63
%448 = and i32 %445, -64
%449 = or i32 %447, %448
- %.pre25.i = load double, double* %351, align 4
- %.pre26.i = load double, double* %353, align 4
+ %.pre25.i = load double, ptr %351, align 4
+ %.pre26.i = load double, ptr %353, align 4
br label %bb16.i
bb16.i:
- %avail_edge.tmp.0 = phi %struct.edge_rec* [ %avail_edge.promoted, %bb14.i ], [ %436, %bb15.i ]
+ %avail_edge.tmp.0 = phi ptr [ %avail_edge.promoted, %bb14.i ], [ %436, %bb15.i ]
%450 = phi double [ %.pre26.i, %bb15.i ], [ %354, %bb14.i ]
%451 = phi double [ %.pre25.i, %bb15.i ], [ %352, %bb14.i ]
- %rcand.0.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]
+ %rcand.0.i = phi ptr [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]
%t.1.in.i = phi i32 [ %338, %bb14.i ], [ %449, %bb15.i ]
%.pn3.in.in.i = phi i32 [ %338, %bb14.i ], [ %449, %bb15.i ]
- %.pn.in.in.in.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]
- %rcand.2.pn.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]
- %t.1.i = inttoptr i32 %t.1.in.i to %struct.edge_rec*
- %.pn.in.in.i = ptrtoint %struct.edge_rec* %.pn.in.in.in.i to i32
+ %.pn.in.in.in.i = phi ptr [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]
+ %rcand.2.pn.i = phi ptr [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]
+ %t.1.i = inttoptr i32 %t.1.in.i to ptr
+ %.pn.in.in.i = ptrtoint ptr %.pn.in.in.in.i to i32
%.pn3.in.i = xor i32 %.pn3.in.in.i, 32
%.pn.in.i = xor i32 %.pn.in.in.i, 32
- %.pn3.i = inttoptr i32 %.pn3.in.i to %struct.edge_rec*
- %.pn.i = inttoptr i32 %.pn.in.i to %struct.edge_rec*
- %v1.1.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn3.i, i32 0, i32 0
- %v2.1.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn.i, i32 0, i32 0
- %v3.1.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %rcand.2.pn.i, i32 0, i32 0
- %v1.1.i = load %struct.VERTEX*, %struct.VERTEX** %v1.1.in.i
- %v2.1.i = load %struct.VERTEX*, %struct.VERTEX** %v2.1.in.i
- %v3.1.i = load %struct.VERTEX*, %struct.VERTEX** %v3.1.in.i
- %452 = load double, double* %363, align 4
- %453 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 0
- %454 = load double, double* %453, align 4
+ %.pn3.i = inttoptr i32 %.pn3.in.i to ptr
+ %.pn.i = inttoptr i32 %.pn.in.i to ptr
+ %v1.1.in.i = getelementptr %struct.edge_rec, ptr %.pn3.i, i32 0, i32 0
+ %v2.1.in.i = getelementptr %struct.edge_rec, ptr %.pn.i, i32 0, i32 0
+ %v3.1.in.i = getelementptr %struct.edge_rec, ptr %rcand.2.pn.i, i32 0, i32 0
+ %v1.1.i = load ptr, ptr %v1.1.in.i
+ %v2.1.i = load ptr, ptr %v2.1.in.i
+ %v3.1.i = load ptr, ptr %v3.1.in.i
+ %452 = load double, ptr %363, align 4
+ %453 = getelementptr %struct.VERTEX, ptr %v1.1.i, i32 0, i32 0, i32 0
+ %454 = load double, ptr %453, align 4
%455 = fsub double %454, %451
- %456 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 1
- %457 = load double, double* %456, align 4
+ %456 = getelementptr %struct.VERTEX, ptr %v1.1.i, i32 0, i32 0, i32 1
+ %457 = load double, ptr %456, align 4
%458 = fsub double %457, %450
- %459 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 2
- %460 = load double, double* %459, align 4
- %461 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 0
- %462 = load double, double* %461, align 4
+ %459 = getelementptr %struct.VERTEX, ptr %v1.1.i, i32 0, i32 0, i32 2
+ %460 = load double, ptr %459, align 4
+ %461 = getelementptr %struct.VERTEX, ptr %v2.1.i, i32 0, i32 0, i32 0
+ %462 = load double, ptr %461, align 4
%463 = fsub double %462, %451
- %464 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 1
- %465 = load double, double* %464, align 4
+ %464 = getelementptr %struct.VERTEX, ptr %v2.1.i, i32 0, i32 0, i32 1
+ %465 = load double, ptr %464, align 4
%466 = fsub double %465, %450
- %467 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 2
- %468 = load double, double* %467, align 4
- %469 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 0
- %470 = load double, double* %469, align 4
+ %467 = getelementptr %struct.VERTEX, ptr %v2.1.i, i32 0, i32 0, i32 2
+ %468 = load double, ptr %467, align 4
+ %469 = getelementptr %struct.VERTEX, ptr %v3.1.i, i32 0, i32 0, i32 0
+ %470 = load double, ptr %469, align 4
%471 = fsub double %470, %451
- %472 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 1
- %473 = load double, double* %472, align 4
+ %472 = getelementptr %struct.VERTEX, ptr %v3.1.i, i32 0, i32 0, i32 1
+ %473 = load double, ptr %472, align 4
%474 = fsub double %473, %450
- %475 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 2
- %476 = load double, double* %475, align 4
+ %475 = getelementptr %struct.VERTEX, ptr %v3.1.i, i32 0, i32 0, i32 2
+ %476 = load double, ptr %475, align 4
%477 = fsub double %460, %452
%478 = fmul double %463, %474
%479 = fmul double %466, %471
@@ -661,32 +661,32 @@ bb16.i:
br i1 %494, label %bb15.i, label %bb17.loopexit.i
bb17.loopexit.i:
- store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** @avail_edge
- %.pre27.i = load %struct.VERTEX*, %struct.VERTEX** %170, align 4
- %.pre28.i = load %struct.VERTEX*, %struct.VERTEX** %175, align 4
+ store ptr %avail_edge.tmp.0, ptr @avail_edge
+ %.pre27.i = load ptr, ptr %170, align 4
+ %.pre28.i = load ptr, ptr %175, align 4
br label %bb17.i
bb17.i:
- %495 = phi %struct.VERTEX* [ %.pre28.i, %bb17.loopexit.i ], [ %324, %bb13.i ]
- %496 = phi %struct.VERTEX* [ %.pre27.i, %bb17.loopexit.i ], [ %325, %bb13.i ]
- %rcand.1.i = phi %struct.edge_rec* [ %rcand.0.i, %bb17.loopexit.i ], [ %rcand.2.i, %bb13.i ]
- %497 = ptrtoint %struct.edge_rec* %lcand.1.i to i32
+ %495 = phi ptr [ %.pre28.i, %bb17.loopexit.i ], [ %324, %bb13.i ]
+ %496 = phi ptr [ %.pre27.i, %bb17.loopexit.i ], [ %325, %bb13.i ]
+ %rcand.1.i = phi ptr [ %rcand.0.i, %bb17.loopexit.i ], [ %rcand.2.i, %bb13.i ]
+ %497 = ptrtoint ptr %lcand.1.i to i32
%498 = xor i32 %497, 32
- %499 = inttoptr i32 %498 to %struct.edge_rec*
- %500 = getelementptr %struct.edge_rec, %struct.edge_rec* %499, i32 0, i32 0
- %501 = load %struct.VERTEX*, %struct.VERTEX** %500, align 4
- %502 = getelementptr %struct.VERTEX, %struct.VERTEX* %496, i32 0, i32 0, i32 0
- %503 = load double, double* %502, align 4
- %504 = getelementptr %struct.VERTEX, %struct.VERTEX* %496, i32 0, i32 0, i32 1
- %505 = load double, double* %504, align 4
- %506 = getelementptr %struct.VERTEX, %struct.VERTEX* %501, i32 0, i32 0, i32 0
- %507 = load double, double* %506, align 4
- %508 = getelementptr %struct.VERTEX, %struct.VERTEX* %501, i32 0, i32 0, i32 1
- %509 = load double, double* %508, align 4
- %510 = getelementptr %struct.VERTEX, %struct.VERTEX* %495, i32 0, i32 0, i32 0
- %511 = load double, double* %510, align 4
- %512 = getelementptr %struct.VERTEX, %struct.VERTEX* %495, i32 0, i32 0, i32 1
- %513 = load double, double* %512, align 4
+ %499 = inttoptr i32 %498 to ptr
+ %500 = getelementptr %struct.edge_rec, ptr %499, i32 0, i32 0
+ %501 = load ptr, ptr %500, align 4
+ %502 = getelementptr %struct.VERTEX, ptr %496, i32 0, i32 0, i32 0
+ %503 = load double, ptr %502, align 4
+ %504 = getelementptr %struct.VERTEX, ptr %496, i32 0, i32 0, i32 1
+ %505 = load double, ptr %504, align 4
+ %506 = getelementptr %struct.VERTEX, ptr %501, i32 0, i32 0, i32 0
+ %507 = load double, ptr %506, align 4
+ %508 = getelementptr %struct.VERTEX, ptr %501, i32 0, i32 0, i32 1
+ %509 = load double, ptr %508, align 4
+ %510 = getelementptr %struct.VERTEX, ptr %495, i32 0, i32 0, i32 0
+ %511 = load double, ptr %510, align 4
+ %512 = getelementptr %struct.VERTEX, ptr %495, i32 0, i32 0, i32 1
+ %513 = load double, ptr %512, align 4
%514 = fsub double %503, %511
%515 = fsub double %509, %513
%516 = fmul double %514, %515
@@ -695,15 +695,15 @@ bb17.i:
%519 = fmul double %517, %518
%520 = fsub double %516, %519
%521 = fcmp ogt double %520, 0.000000e+00
- %522 = ptrtoint %struct.edge_rec* %rcand.1.i to i32
+ %522 = ptrtoint ptr %rcand.1.i to i32
%523 = xor i32 %522, 32
- %524 = inttoptr i32 %523 to %struct.edge_rec*
- %525 = getelementptr %struct.edge_rec, %struct.edge_rec* %524, i32 0, i32 0
- %526 = load %struct.VERTEX*, %struct.VERTEX** %525, align 4
- %527 = getelementptr %struct.VERTEX, %struct.VERTEX* %526, i32 0, i32 0, i32 0
- %528 = load double, double* %527, align 4
- %529 = getelementptr %struct.VERTEX, %struct.VERTEX* %526, i32 0, i32 0, i32 1
- %530 = load double, double* %529, align 4
+ %524 = inttoptr i32 %523 to ptr
+ %525 = getelementptr %struct.edge_rec, ptr %524, i32 0, i32 0
+ %526 = load ptr, ptr %525, align 4
+ %527 = getelementptr %struct.VERTEX, ptr %526, i32 0, i32 0, i32 0
+ %528 = load double, ptr %527, align 4
+ %529 = getelementptr %struct.VERTEX, ptr %526, i32 0, i32 0, i32 1
+ %530 = load double, ptr %529, align 4
%531 = fsub double %530, %513
%532 = fmul double %514, %531
%533 = fsub double %528, %511
@@ -714,38 +714,38 @@ bb17.i:
br i1 %537, label %bb21.i, label %do_merge.exit
bb21.i:
- %538 = getelementptr %struct.edge_rec, %struct.edge_rec* %lcand.1.i, i32 0, i32 0
- %539 = load %struct.VERTEX*, %struct.VERTEX** %538, align 4
- %540 = getelementptr %struct.edge_rec, %struct.edge_rec* %rcand.1.i, i32 0, i32 0
- %541 = load %struct.VERTEX*, %struct.VERTEX** %540, align 4
+ %538 = getelementptr %struct.edge_rec, ptr %lcand.1.i, i32 0, i32 0
+ %539 = load ptr, ptr %538, align 4
+ %540 = getelementptr %struct.edge_rec, ptr %rcand.1.i, i32 0, i32 0
+ %541 = load ptr, ptr %540, align 4
br i1 %521, label %bb22.i, label %bb24.i
bb22.i:
br i1 %536, label %bb23.i, label %bb25.i
bb23.i:
- %542 = getelementptr %struct.VERTEX, %struct.VERTEX* %526, i32 0, i32 0, i32 2
- %543 = load double, double* %542, align 4
+ %542 = getelementptr %struct.VERTEX, ptr %526, i32 0, i32 0, i32 2
+ %543 = load double, ptr %542, align 4
%544 = fsub double %507, %528
%545 = fsub double %509, %530
- %546 = getelementptr %struct.VERTEX, %struct.VERTEX* %501, i32 0, i32 0, i32 2
- %547 = load double, double* %546, align 4
- %548 = getelementptr %struct.VERTEX, %struct.VERTEX* %539, i32 0, i32 0, i32 0
- %549 = load double, double* %548, align 4
+ %546 = getelementptr %struct.VERTEX, ptr %501, i32 0, i32 0, i32 2
+ %547 = load double, ptr %546, align 4
+ %548 = getelementptr %struct.VERTEX, ptr %539, i32 0, i32 0, i32 0
+ %549 = load double, ptr %548, align 4
%550 = fsub double %549, %528
- %551 = getelementptr %struct.VERTEX, %struct.VERTEX* %539, i32 0, i32 0, i32 1
- %552 = load double, double* %551, align 4
+ %551 = getelementptr %struct.VERTEX, ptr %539, i32 0, i32 0, i32 1
+ %552 = load double, ptr %551, align 4
%553 = fsub double %552, %530
- %554 = getelementptr %struct.VERTEX, %struct.VERTEX* %539, i32 0, i32 0, i32 2
- %555 = load double, double* %554, align 4
- %556 = getelementptr %struct.VERTEX, %struct.VERTEX* %541, i32 0, i32 0, i32 0
- %557 = load double, double* %556, align 4
+ %554 = getelementptr %struct.VERTEX, ptr %539, i32 0, i32 0, i32 2
+ %555 = load double, ptr %554, align 4
+ %556 = getelementptr %struct.VERTEX, ptr %541, i32 0, i32 0, i32 0
+ %557 = load double, ptr %556, align 4
%558 = fsub double %557, %528
- %559 = getelementptr %struct.VERTEX, %struct.VERTEX* %541, i32 0, i32 0, i32 1
- %560 = load double, double* %559, align 4
+ %559 = getelementptr %struct.VERTEX, ptr %541, i32 0, i32 0, i32 1
+ %560 = load double, ptr %559, align 4
%561 = fsub double %560, %530
- %562 = getelementptr %struct.VERTEX, %struct.VERTEX* %541, i32 0, i32 0, i32 2
- %563 = load double, double* %562, align 4
+ %562 = getelementptr %struct.VERTEX, ptr %541, i32 0, i32 0, i32 2
+ %563 = load double, ptr %562, align 4
%564 = fsub double %547, %543
%565 = fmul double %550, %561
%566 = fmul double %553, %558
@@ -771,101 +771,101 @@ bb24.i:
%583 = and i32 %582, 63
%584 = and i32 %522, -64
%585 = or i32 %583, %584
- %586 = inttoptr i32 %585 to %struct.edge_rec*
- %587 = getelementptr %struct.edge_rec, %struct.edge_rec* %586, i32 0, i32 1
- %588 = load %struct.edge_rec*, %struct.edge_rec** %587, align 4
- %589 = ptrtoint %struct.edge_rec* %588 to i32
+ %586 = inttoptr i32 %585 to ptr
+ %587 = getelementptr %struct.edge_rec, ptr %586, i32 0, i32 1
+ %588 = load ptr, ptr %587, align 4
+ %589 = ptrtoint ptr %588 to i32
%590 = add i32 %589, 16
%591 = and i32 %590, 63
%592 = and i32 %589, -64
%593 = or i32 %591, %592
- %594 = inttoptr i32 %593 to %struct.edge_rec*
- %595 = call %struct.edge_rec* @alloc_edge() nounwind
- %596 = getelementptr %struct.edge_rec, %struct.edge_rec* %595, i32 0, i32 1
- store %struct.edge_rec* %595, %struct.edge_rec** %596, align 4
- %597 = getelementptr %struct.edge_rec, %struct.edge_rec* %595, i32 0, i32 0
- store %struct.VERTEX* %526, %struct.VERTEX** %597, align 4
- %598 = ptrtoint %struct.edge_rec* %595 to i32
+ %594 = inttoptr i32 %593 to ptr
+ %595 = call ptr @alloc_edge() nounwind
+ %596 = getelementptr %struct.edge_rec, ptr %595, i32 0, i32 1
+ store ptr %595, ptr %596, align 4
+ %597 = getelementptr %struct.edge_rec, ptr %595, i32 0, i32 0
+ store ptr %526, ptr %597, align 4
+ %598 = ptrtoint ptr %595 to i32
%599 = add i32 %598, 16
- %600 = inttoptr i32 %599 to %struct.edge_rec*
+ %600 = inttoptr i32 %599 to ptr
%601 = add i32 %598, 48
- %602 = inttoptr i32 %601 to %struct.edge_rec*
- %603 = getelementptr %struct.edge_rec, %struct.edge_rec* %600, i32 0, i32 1
- store %struct.edge_rec* %602, %struct.edge_rec** %603, align 4
+ %602 = inttoptr i32 %601 to ptr
+ %603 = getelementptr %struct.edge_rec, ptr %600, i32 0, i32 1
+ store ptr %602, ptr %603, align 4
%604 = add i32 %598, 32
- %605 = inttoptr i32 %604 to %struct.edge_rec*
- %606 = getelementptr %struct.edge_rec, %struct.edge_rec* %605, i32 0, i32 1
- store %struct.edge_rec* %605, %struct.edge_rec** %606, align 4
- %607 = getelementptr %struct.edge_rec, %struct.edge_rec* %605, i32 0, i32 0
- store %struct.VERTEX* %495, %struct.VERTEX** %607, align 4
- %608 = getelementptr %struct.edge_rec, %struct.edge_rec* %602, i32 0, i32 1
- store %struct.edge_rec* %600, %struct.edge_rec** %608, align 4
- %609 = load %struct.edge_rec*, %struct.edge_rec** %596, align 4
- %610 = ptrtoint %struct.edge_rec* %609 to i32
+ %605 = inttoptr i32 %604 to ptr
+ %606 = getelementptr %struct.edge_rec, ptr %605, i32 0, i32 1
+ store ptr %605, ptr %606, align 4
+ %607 = getelementptr %struct.edge_rec, ptr %605, i32 0, i32 0
+ store ptr %495, ptr %607, align 4
+ %608 = getelementptr %struct.edge_rec, ptr %602, i32 0, i32 1
+ store ptr %600, ptr %608, align 4
+ %609 = load ptr, ptr %596, align 4
+ %610 = ptrtoint ptr %609 to i32
%611 = add i32 %610, 16
%612 = and i32 %611, 63
%613 = and i32 %610, -64
%614 = or i32 %612, %613
- %615 = inttoptr i32 %614 to %struct.edge_rec*
- %616 = getelementptr %struct.edge_rec, %struct.edge_rec* %594, i32 0, i32 1
- %617 = load %struct.edge_rec*, %struct.edge_rec** %616, align 4
- %618 = ptrtoint %struct.edge_rec* %617 to i32
+ %615 = inttoptr i32 %614 to ptr
+ %616 = getelementptr %struct.edge_rec, ptr %594, i32 0, i32 1
+ %617 = load ptr, ptr %616, align 4
+ %618 = ptrtoint ptr %617 to i32
%619 = add i32 %618, 16
%620 = and i32 %619, 63
%621 = and i32 %618, -64
%622 = or i32 %620, %621
- %623 = inttoptr i32 %622 to %struct.edge_rec*
- %624 = getelementptr %struct.edge_rec, %struct.edge_rec* %623, i32 0, i32 1
- %625 = load %struct.edge_rec*, %struct.edge_rec** %624, align 4
- %626 = getelementptr %struct.edge_rec, %struct.edge_rec* %615, i32 0, i32 1
- %627 = load %struct.edge_rec*, %struct.edge_rec** %626, align 4
- store %struct.edge_rec* %625, %struct.edge_rec** %626, align 4
- store %struct.edge_rec* %627, %struct.edge_rec** %624, align 4
- %628 = load %struct.edge_rec*, %struct.edge_rec** %596, align 4
- %629 = load %struct.edge_rec*, %struct.edge_rec** %616, align 4
- store %struct.edge_rec* %628, %struct.edge_rec** %616, align 4
- store %struct.edge_rec* %629, %struct.edge_rec** %596, align 4
+ %623 = inttoptr i32 %622 to ptr
+ %624 = getelementptr %struct.edge_rec, ptr %623, i32 0, i32 1
+ %625 = load ptr, ptr %624, align 4
+ %626 = getelementptr %struct.edge_rec, ptr %615, i32 0, i32 1
+ %627 = load ptr, ptr %626, align 4
+ store ptr %625, ptr %626, align 4
+ store ptr %627, ptr %624, align 4
+ %628 = load ptr, ptr %596, align 4
+ %629 = load ptr, ptr %616, align 4
+ store ptr %628, ptr %616, align 4
+ store ptr %629, ptr %596, align 4
%630 = xor i32 %598, 32
- %631 = inttoptr i32 %630 to %struct.edge_rec*
- %632 = getelementptr %struct.edge_rec, %struct.edge_rec* %631, i32 0, i32 1
- %633 = load %struct.edge_rec*, %struct.edge_rec** %632, align 4
- %634 = ptrtoint %struct.edge_rec* %633 to i32
+ %631 = inttoptr i32 %630 to ptr
+ %632 = getelementptr %struct.edge_rec, ptr %631, i32 0, i32 1
+ %633 = load ptr, ptr %632, align 4
+ %634 = ptrtoint ptr %633 to i32
%635 = add i32 %634, 16
%636 = and i32 %635, 63
%637 = and i32 %634, -64
%638 = or i32 %636, %637
- %639 = inttoptr i32 %638 to %struct.edge_rec*
- %640 = getelementptr %struct.edge_rec, %struct.edge_rec* %174, i32 0, i32 1
- %641 = load %struct.edge_rec*, %struct.edge_rec** %640, align 4
- %642 = ptrtoint %struct.edge_rec* %641 to i32
+ %639 = inttoptr i32 %638 to ptr
+ %640 = getelementptr %struct.edge_rec, ptr %174, i32 0, i32 1
+ %641 = load ptr, ptr %640, align 4
+ %642 = ptrtoint ptr %641 to i32
%643 = add i32 %642, 16
%644 = and i32 %643, 63
%645 = and i32 %642, -64
%646 = or i32 %644, %645
- %647 = inttoptr i32 %646 to %struct.edge_rec*
- %648 = getelementptr %struct.edge_rec, %struct.edge_rec* %647, i32 0, i32 1
- %649 = load %struct.edge_rec*, %struct.edge_rec** %648, align 4
- %650 = getelementptr %struct.edge_rec, %struct.edge_rec* %639, i32 0, i32 1
- %651 = load %struct.edge_rec*, %struct.edge_rec** %650, align 4
- store %struct.edge_rec* %649, %struct.edge_rec** %650, align 4
- store %struct.edge_rec* %651, %struct.edge_rec** %648, align 4
- %652 = load %struct.edge_rec*, %struct.edge_rec** %632, align 4
- %653 = load %struct.edge_rec*, %struct.edge_rec** %640, align 4
- store %struct.edge_rec* %652, %struct.edge_rec** %640, align 4
- store %struct.edge_rec* %653, %struct.edge_rec** %632, align 4
+ %647 = inttoptr i32 %646 to ptr
+ %648 = getelementptr %struct.edge_rec, ptr %647, i32 0, i32 1
+ %649 = load ptr, ptr %648, align 4
+ %650 = getelementptr %struct.edge_rec, ptr %639, i32 0, i32 1
+ %651 = load ptr, ptr %650, align 4
+ store ptr %649, ptr %650, align 4
+ store ptr %651, ptr %648, align 4
+ %652 = load ptr, ptr %632, align 4
+ %653 = load ptr, ptr %640, align 4
+ store ptr %652, ptr %640, align 4
+ store ptr %653, ptr %632, align 4
%654 = add i32 %630, 48
%655 = and i32 %654, 63
%656 = and i32 %598, -64
%657 = or i32 %655, %656
- %658 = inttoptr i32 %657 to %struct.edge_rec*
- %659 = getelementptr %struct.edge_rec, %struct.edge_rec* %658, i32 0, i32 1
- %660 = load %struct.edge_rec*, %struct.edge_rec** %659, align 4
- %661 = ptrtoint %struct.edge_rec* %660 to i32
+ %658 = inttoptr i32 %657 to ptr
+ %659 = getelementptr %struct.edge_rec, ptr %658, i32 0, i32 1
+ %660 = load ptr, ptr %659, align 4
+ %661 = ptrtoint ptr %660 to i32
%662 = add i32 %661, 16
%663 = and i32 %662, 63
%664 = and i32 %661, -64
%665 = or i32 %663, %664
- %666 = inttoptr i32 %665 to %struct.edge_rec*
+ %666 = inttoptr i32 %665 to ptr
br label %bb9.i
bb25.i:
@@ -873,334 +873,334 @@ bb25.i:
%668 = and i32 %667, 63
%669 = and i32 %172, -64
%670 = or i32 %668, %669
- %671 = inttoptr i32 %670 to %struct.edge_rec*
- %672 = getelementptr %struct.edge_rec, %struct.edge_rec* %671, i32 0, i32 1
- %673 = load %struct.edge_rec*, %struct.edge_rec** %672, align 4
- %674 = ptrtoint %struct.edge_rec* %673 to i32
+ %671 = inttoptr i32 %670 to ptr
+ %672 = getelementptr %struct.edge_rec, ptr %671, i32 0, i32 1
+ %673 = load ptr, ptr %672, align 4
+ %674 = ptrtoint ptr %673 to i32
%675 = add i32 %674, 16
%676 = and i32 %675, 63
%677 = and i32 %674, -64
%678 = or i32 %676, %677
- %679 = inttoptr i32 %678 to %struct.edge_rec*
- %680 = call %struct.edge_rec* @alloc_edge() nounwind
- %681 = getelementptr %struct.edge_rec, %struct.edge_rec* %680, i32 0, i32 1
- store %struct.edge_rec* %680, %struct.edge_rec** %681, align 4
- %682 = getelementptr %struct.edge_rec, %struct.edge_rec* %680, i32 0, i32 0
- store %struct.VERTEX* %501, %struct.VERTEX** %682, align 4
- %683 = ptrtoint %struct.edge_rec* %680 to i32
+ %679 = inttoptr i32 %678 to ptr
+ %680 = call ptr @alloc_edge() nounwind
+ %681 = getelementptr %struct.edge_rec, ptr %680, i32 0, i32 1
+ store ptr %680, ptr %681, align 4
+ %682 = getelementptr %struct.edge_rec, ptr %680, i32 0, i32 0
+ store ptr %501, ptr %682, align 4
+ %683 = ptrtoint ptr %680 to i32
%684 = add i32 %683, 16
- %685 = inttoptr i32 %684 to %struct.edge_rec*
+ %685 = inttoptr i32 %684 to ptr
%686 = add i32 %683, 48
- %687 = inttoptr i32 %686 to %struct.edge_rec*
- %688 = getelementptr %struct.edge_rec, %struct.edge_rec* %685, i32 0, i32 1
- store %struct.edge_rec* %687, %struct.edge_rec** %688, align 4
+ %687 = inttoptr i32 %686 to ptr
+ %688 = getelementptr %struct.edge_rec, ptr %685, i32 0, i32 1
+ store ptr %687, ptr %688, align 4
%689 = add i32 %683, 32
- %690 = inttoptr i32 %689 to %struct.edge_rec*
- %691 = getelementptr %struct.edge_rec, %struct.edge_rec* %690, i32 0, i32 1
- store %struct.edge_rec* %690, %struct.edge_rec** %691, align 4
- %692 = getelementptr %struct.edge_rec, %struct.edge_rec* %690, i32 0, i32 0
- store %struct.VERTEX* %496, %struct.VERTEX** %692, align 4
- %693 = getelementptr %struct.edge_rec, %struct.edge_rec* %687, i32 0, i32 1
- store %struct.edge_rec* %685, %struct.edge_rec** %693, align 4
- %694 = load %struct.edge_rec*, %struct.edge_rec** %681, align 4
- %695 = ptrtoint %struct.edge_rec* %694 to i32
+ %690 = inttoptr i32 %689 to ptr
+ %691 = getelementptr %struct.edge_rec, ptr %690, i32 0, i32 1
+ store ptr %690, ptr %691, align 4
+ %692 = getelementptr %struct.edge_rec, ptr %690, i32 0, i32 0
+ store ptr %496, ptr %692, align 4
+ %693 = getelementptr %struct.edge_rec, ptr %687, i32 0, i32 1
+ store ptr %685, ptr %693, align 4
+ %694 = load ptr, ptr %681, align 4
+ %695 = ptrtoint ptr %694 to i32
%696 = add i32 %695, 16
%697 = and i32 %696, 63
%698 = and i32 %695, -64
%699 = or i32 %697, %698
- %700 = inttoptr i32 %699 to %struct.edge_rec*
- %701 = getelementptr %struct.edge_rec, %struct.edge_rec* %499, i32 0, i32 1
- %702 = load %struct.edge_rec*, %struct.edge_rec** %701, align 4
- %703 = ptrtoint %struct.edge_rec* %702 to i32
+ %700 = inttoptr i32 %699 to ptr
+ %701 = getelementptr %struct.edge_rec, ptr %499, i32 0, i32 1
+ %702 = load ptr, ptr %701, align 4
+ %703 = ptrtoint ptr %702 to i32
%704 = add i32 %703, 16
%705 = and i32 %704, 63
%706 = and i32 %703, -64
%707 = or i32 %705, %706
- %708 = inttoptr i32 %707 to %struct.edge_rec*
- %709 = getelementptr %struct.edge_rec, %struct.edge_rec* %708, i32 0, i32 1
- %710 = load %struct.edge_rec*, %struct.edge_rec** %709, align 4
- %711 = getelementptr %struct.edge_rec, %struct.edge_rec* %700, i32 0, i32 1
- %712 = load %struct.edge_rec*, %struct.edge_rec** %711, align 4
- store %struct.edge_rec* %710, %struct.edge_rec** %711, align 4
- store %struct.edge_rec* %712, %struct.edge_rec** %709, align 4
- %713 = load %struct.edge_rec*, %struct.edge_rec** %681, align 4
- %714 = load %struct.edge_rec*, %struct.edge_rec** %701, align 4
- store %struct.edge_rec* %713, %struct.edge_rec** %701, align 4
- store %struct.edge_rec* %714, %struct.edge_rec** %681, align 4
+ %708 = inttoptr i32 %707 to ptr
+ %709 = getelementptr %struct.edge_rec, ptr %708, i32 0, i32 1
+ %710 = load ptr, ptr %709, align 4
+ %711 = getelementptr %struct.edge_rec, ptr %700, i32 0, i32 1
+ %712 = load ptr, ptr %711, align 4
+ store ptr %710, ptr %711, align 4
+ store ptr %712, ptr %709, align 4
+ %713 = load ptr, ptr %681, align 4
+ %714 = load ptr, ptr %701, align 4
+ store ptr %713, ptr %701, align 4
+ store ptr %714, ptr %681, align 4
%715 = xor i32 %683, 32
- %716 = inttoptr i32 %715 to %struct.edge_rec*
- %717 = getelementptr %struct.edge_rec, %struct.edge_rec* %716, i32 0, i32 1
- %718 = load %struct.edge_rec*, %struct.edge_rec** %717, align 4
- %719 = ptrtoint %struct.edge_rec* %718 to i32
+ %716 = inttoptr i32 %715 to ptr
+ %717 = getelementptr %struct.edge_rec, ptr %716, i32 0, i32 1
+ %718 = load ptr, ptr %717, align 4
+ %719 = ptrtoint ptr %718 to i32
%720 = add i32 %719, 16
%721 = and i32 %720, 63
%722 = and i32 %719, -64
%723 = or i32 %721, %722
- %724 = inttoptr i32 %723 to %struct.edge_rec*
- %725 = getelementptr %struct.edge_rec, %struct.edge_rec* %679, i32 0, i32 1
- %726 = load %struct.edge_rec*, %struct.edge_rec** %725, align 4
- %727 = ptrtoint %struct.edge_rec* %726 to i32
+ %724 = inttoptr i32 %723 to ptr
+ %725 = getelementptr %struct.edge_rec, ptr %679, i32 0, i32 1
+ %726 = load ptr, ptr %725, align 4
+ %727 = ptrtoint ptr %726 to i32
%728 = add i32 %727, 16
%729 = and i32 %728, 63
%730 = and i32 %727, -64
%731 = or i32 %729, %730
- %732 = inttoptr i32 %731 to %struct.edge_rec*
- %733 = getelementptr %struct.edge_rec, %struct.edge_rec* %732, i32 0, i32 1
- %734 = load %struct.edge_rec*, %struct.edge_rec** %733, align 4
- %735 = getelementptr %struct.edge_rec, %struct.edge_rec* %724, i32 0, i32 1
- %736 = load %struct.edge_rec*, %struct.edge_rec** %735, align 4
- store %struct.edge_rec* %734, %struct.edge_rec** %735, align 4
- store %struct.edge_rec* %736, %struct.edge_rec** %733, align 4
- %737 = load %struct.edge_rec*, %struct.edge_rec** %717, align 4
- %738 = load %struct.edge_rec*, %struct.edge_rec** %725, align 4
- store %struct.edge_rec* %737, %struct.edge_rec** %725, align 4
- store %struct.edge_rec* %738, %struct.edge_rec** %717, align 4
- %739 = load %struct.edge_rec*, %struct.edge_rec** %681, align 4
+ %732 = inttoptr i32 %731 to ptr
+ %733 = getelementptr %struct.edge_rec, ptr %732, i32 0, i32 1
+ %734 = load ptr, ptr %733, align 4
+ %735 = getelementptr %struct.edge_rec, ptr %724, i32 0, i32 1
+ %736 = load ptr, ptr %735, align 4
+ store ptr %734, ptr %735, align 4
+ store ptr %736, ptr %733, align 4
+ %737 = load ptr, ptr %717, align 4
+ %738 = load ptr, ptr %725, align 4
+ store ptr %737, ptr %725, align 4
+ store ptr %738, ptr %717, align 4
+ %739 = load ptr, ptr %681, align 4
br label %bb9.i
do_merge.exit:
- %740 = getelementptr %struct.edge_rec, %struct.edge_rec* %ldo_addr.0.ph.i, i32 0, i32 0
- %741 = load %struct.VERTEX*, %struct.VERTEX** %740, align 4
- %742 = icmp eq %struct.VERTEX* %741, %tree_addr.0.i
+ %740 = getelementptr %struct.edge_rec, ptr %ldo_addr.0.ph.i, i32 0, i32 0
+ %741 = load ptr, ptr %740, align 4
+ %742 = icmp eq ptr %741, %tree_addr.0.i
br i1 %742, label %bb5.loopexit, label %bb2
bb2:
- %ldo.07 = phi %struct.edge_rec* [ %747, %bb2 ], [ %ldo_addr.0.ph.i, %do_merge.exit ]
- %743 = ptrtoint %struct.edge_rec* %ldo.07 to i32
+ %ldo.07 = phi ptr [ %747, %bb2 ], [ %ldo_addr.0.ph.i, %do_merge.exit ]
+ %743 = ptrtoint ptr %ldo.07 to i32
%744 = xor i32 %743, 32
- %745 = inttoptr i32 %744 to %struct.edge_rec*
- %746 = getelementptr %struct.edge_rec, %struct.edge_rec* %745, i32 0, i32 1
- %747 = load %struct.edge_rec*, %struct.edge_rec** %746, align 4
- %748 = getelementptr %struct.edge_rec, %struct.edge_rec* %747, i32 0, i32 0
- %749 = load %struct.VERTEX*, %struct.VERTEX** %748, align 4
- %750 = icmp eq %struct.VERTEX* %749, %tree_addr.0.i
+ %745 = inttoptr i32 %744 to ptr
+ %746 = getelementptr %struct.edge_rec, ptr %745, i32 0, i32 1
+ %747 = load ptr, ptr %746, align 4
+ %748 = getelementptr %struct.edge_rec, ptr %747, i32 0, i32 0
+ %749 = load ptr, ptr %748, align 4
+ %750 = icmp eq ptr %749, %tree_addr.0.i
br i1 %750, label %bb5.loopexit, label %bb2
bb4:
- %rdo.05 = phi %struct.edge_rec* [ %755, %bb4 ], [ %rdo_addr.0.i, %bb5.loopexit ]
- %751 = getelementptr %struct.edge_rec, %struct.edge_rec* %rdo.05, i32 0, i32 1
- %752 = load %struct.edge_rec*, %struct.edge_rec** %751, align 4
- %753 = ptrtoint %struct.edge_rec* %752 to i32
+ %rdo.05 = phi ptr [ %755, %bb4 ], [ %rdo_addr.0.i, %bb5.loopexit ]
+ %751 = getelementptr %struct.edge_rec, ptr %rdo.05, i32 0, i32 1
+ %752 = load ptr, ptr %751, align 4
+ %753 = ptrtoint ptr %752 to i32
%754 = xor i32 %753, 32
- %755 = inttoptr i32 %754 to %struct.edge_rec*
- %756 = getelementptr %struct.edge_rec, %struct.edge_rec* %755, i32 0, i32 0
- %757 = load %struct.VERTEX*, %struct.VERTEX** %756, align 4
- %758 = icmp eq %struct.VERTEX* %757, %extra
+ %755 = inttoptr i32 %754 to ptr
+ %756 = getelementptr %struct.edge_rec, ptr %755, i32 0, i32 0
+ %757 = load ptr, ptr %756, align 4
+ %758 = icmp eq ptr %757, %extra
br i1 %758, label %bb6, label %bb4
bb5.loopexit:
- %ldo.0.lcssa = phi %struct.edge_rec* [ %ldo_addr.0.ph.i, %do_merge.exit ], [ %747, %bb2 ]
- %759 = getelementptr %struct.edge_rec, %struct.edge_rec* %rdo_addr.0.i, i32 0, i32 0
- %760 = load %struct.VERTEX*, %struct.VERTEX** %759, align 4
- %761 = icmp eq %struct.VERTEX* %760, %extra
+ %ldo.0.lcssa = phi ptr [ %ldo_addr.0.ph.i, %do_merge.exit ], [ %747, %bb2 ]
+ %759 = getelementptr %struct.edge_rec, ptr %rdo_addr.0.i, i32 0, i32 0
+ %760 = load ptr, ptr %759, align 4
+ %761 = icmp eq ptr %760, %extra
br i1 %761, label %bb6, label %bb4
bb6:
- %rdo.0.lcssa = phi %struct.edge_rec* [ %rdo_addr.0.i, %bb5.loopexit ], [ %755, %bb4 ]
- %tmp16 = ptrtoint %struct.edge_rec* %ldo.0.lcssa to i32
- %tmp4 = ptrtoint %struct.edge_rec* %rdo.0.lcssa to i32
+ %rdo.0.lcssa = phi ptr [ %rdo_addr.0.i, %bb5.loopexit ], [ %755, %bb4 ]
+ %tmp16 = ptrtoint ptr %ldo.0.lcssa to i32
+ %tmp4 = ptrtoint ptr %rdo.0.lcssa to i32
br label %bb15
bb7:
- %762 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 1
- %763 = load %struct.VERTEX*, %struct.VERTEX** %762, align 4
- %764 = icmp eq %struct.VERTEX* %763, null
- %765 = call %struct.edge_rec* @alloc_edge() nounwind
- %766 = getelementptr %struct.edge_rec, %struct.edge_rec* %765, i32 0, i32 1
- store %struct.edge_rec* %765, %struct.edge_rec** %766, align 4
- %767 = getelementptr %struct.edge_rec, %struct.edge_rec* %765, i32 0, i32 0
+ %762 = getelementptr %struct.VERTEX, ptr %tree, i32 0, i32 1
+ %763 = load ptr, ptr %762, align 4
+ %764 = icmp eq ptr %763, null
+ %765 = call ptr @alloc_edge() nounwind
+ %766 = getelementptr %struct.edge_rec, ptr %765, i32 0, i32 1
+ store ptr %765, ptr %766, align 4
+ %767 = getelementptr %struct.edge_rec, ptr %765, i32 0, i32 0
br i1 %764, label %bb10, label %bb11
bb8:
- %768 = call i32 @puts(i8* getelementptr ([21 x i8], [21 x i8]* @_2E_str7, i32 0, i32 0)) nounwind
+ %768 = call i32 @puts(ptr @_2E_str7) nounwind
call void @exit(i32 -1) noreturn nounwind
unreachable
bb10:
- store %struct.VERTEX* %tree, %struct.VERTEX** %767, align 4
- %769 = ptrtoint %struct.edge_rec* %765 to i32
+ store ptr %tree, ptr %767, align 4
+ %769 = ptrtoint ptr %765 to i32
%770 = add i32 %769, 16
- %771 = inttoptr i32 %770 to %struct.edge_rec*
+ %771 = inttoptr i32 %770 to ptr
%772 = add i32 %769, 48
- %773 = inttoptr i32 %772 to %struct.edge_rec*
- %774 = getelementptr %struct.edge_rec, %struct.edge_rec* %771, i32 0, i32 1
- store %struct.edge_rec* %773, %struct.edge_rec** %774, align 4
+ %773 = inttoptr i32 %772 to ptr
+ %774 = getelementptr %struct.edge_rec, ptr %771, i32 0, i32 1
+ store ptr %773, ptr %774, align 4
%775 = add i32 %769, 32
- %776 = inttoptr i32 %775 to %struct.edge_rec*
- %777 = getelementptr %struct.edge_rec, %struct.edge_rec* %776, i32 0, i32 1
- store %struct.edge_rec* %776, %struct.edge_rec** %777, align 4
- %778 = getelementptr %struct.edge_rec, %struct.edge_rec* %776, i32 0, i32 0
- store %struct.VERTEX* %extra, %struct.VERTEX** %778, align 4
- %779 = getelementptr %struct.edge_rec, %struct.edge_rec* %773, i32 0, i32 1
- store %struct.edge_rec* %771, %struct.edge_rec** %779, align 4
+ %776 = inttoptr i32 %775 to ptr
+ %777 = getelementptr %struct.edge_rec, ptr %776, i32 0, i32 1
+ store ptr %776, ptr %777, align 4
+ %778 = getelementptr %struct.edge_rec, ptr %776, i32 0, i32 0
+ store ptr %extra, ptr %778, align 4
+ %779 = getelementptr %struct.edge_rec, ptr %773, i32 0, i32 1
+ store ptr %771, ptr %779, align 4
%780 = xor i32 %769, 32
br label %bb15
bb11:
- store %struct.VERTEX* %763, %struct.VERTEX** %767, align 4
- %781 = ptrtoint %struct.edge_rec* %765 to i32
+ store ptr %763, ptr %767, align 4
+ %781 = ptrtoint ptr %765 to i32
%782 = add i32 %781, 16
- %783 = inttoptr i32 %782 to %struct.edge_rec*
+ %783 = inttoptr i32 %782 to ptr
%784 = add i32 %781, 48
- %785 = inttoptr i32 %784 to %struct.edge_rec*
- %786 = getelementptr %struct.edge_rec, %struct.edge_rec* %783, i32 0, i32 1
- store %struct.edge_rec* %785, %struct.edge_rec** %786, align 4
+ %785 = inttoptr i32 %784 to ptr
+ %786 = getelementptr %struct.edge_rec, ptr %783, i32 0, i32 1
+ store ptr %785, ptr %786, align 4
%787 = add i32 %781, 32
- %788 = inttoptr i32 %787 to %struct.edge_rec*
- %789 = getelementptr %struct.edge_rec, %struct.edge_rec* %788, i32 0, i32 1
- store %struct.edge_rec* %788, %struct.edge_rec** %789, align 4
- %790 = getelementptr %struct.edge_rec, %struct.edge_rec* %788, i32 0, i32 0
- store %struct.VERTEX* %tree, %struct.VERTEX** %790, align 4
- %791 = getelementptr %struct.edge_rec, %struct.edge_rec* %785, i32 0, i32 1
- store %struct.edge_rec* %783, %struct.edge_rec** %791, align 4
- %792 = call %struct.edge_rec* @alloc_edge() nounwind
- %793 = getelementptr %struct.edge_rec, %struct.edge_rec* %792, i32 0, i32 1
- store %struct.edge_rec* %792, %struct.edge_rec** %793, align 4
- %794 = getelementptr %struct.edge_rec, %struct.edge_rec* %792, i32 0, i32 0
- store %struct.VERTEX* %tree, %struct.VERTEX** %794, align 4
- %795 = ptrtoint %struct.edge_rec* %792 to i32
+ %788 = inttoptr i32 %787 to ptr
+ %789 = getelementptr %struct.edge_rec, ptr %788, i32 0, i32 1
+ store ptr %788, ptr %789, align 4
+ %790 = getelementptr %struct.edge_rec, ptr %788, i32 0, i32 0
+ store ptr %tree, ptr %790, align 4
+ %791 = getelementptr %struct.edge_rec, ptr %785, i32 0, i32 1
+ store ptr %783, ptr %791, align 4
+ %792 = call ptr @alloc_edge() nounwind
+ %793 = getelementptr %struct.edge_rec, ptr %792, i32 0, i32 1
+ store ptr %792, ptr %793, align 4
+ %794 = getelementptr %struct.edge_rec, ptr %792, i32 0, i32 0
+ store ptr %tree, ptr %794, align 4
+ %795 = ptrtoint ptr %792 to i32
%796 = add i32 %795, 16
- %797 = inttoptr i32 %796 to %struct.edge_rec*
+ %797 = inttoptr i32 %796 to ptr
%798 = add i32 %795, 48
- %799 = inttoptr i32 %798 to %struct.edge_rec*
- %800 = getelementptr %struct.edge_rec, %struct.edge_rec* %797, i32 0, i32 1
- store %struct.edge_rec* %799, %struct.edge_rec** %800, align 4
+ %799 = inttoptr i32 %798 to ptr
+ %800 = getelementptr %struct.edge_rec, ptr %797, i32 0, i32 1
+ store ptr %799, ptr %800, align 4
%801 = add i32 %795, 32
- %802 = inttoptr i32 %801 to %struct.edge_rec*
- %803 = getelementptr %struct.edge_rec, %struct.edge_rec* %802, i32 0, i32 1
- store %struct.edge_rec* %802, %struct.edge_rec** %803, align 4
- %804 = getelementptr %struct.edge_rec, %struct.edge_rec* %802, i32 0, i32 0
- store %struct.VERTEX* %extra, %struct.VERTEX** %804, align 4
- %805 = getelementptr %struct.edge_rec, %struct.edge_rec* %799, i32 0, i32 1
- store %struct.edge_rec* %797, %struct.edge_rec** %805, align 4
+ %802 = inttoptr i32 %801 to ptr
+ %803 = getelementptr %struct.edge_rec, ptr %802, i32 0, i32 1
+ store ptr %802, ptr %803, align 4
+ %804 = getelementptr %struct.edge_rec, ptr %802, i32 0, i32 0
+ store ptr %extra, ptr %804, align 4
+ %805 = getelementptr %struct.edge_rec, ptr %799, i32 0, i32 1
+ store ptr %797, ptr %805, align 4
%806 = xor i32 %781, 32
- %807 = inttoptr i32 %806 to %struct.edge_rec*
- %808 = getelementptr %struct.edge_rec, %struct.edge_rec* %807, i32 0, i32 1
- %809 = load %struct.edge_rec*, %struct.edge_rec** %808, align 4
- %810 = ptrtoint %struct.edge_rec* %809 to i32
+ %807 = inttoptr i32 %806 to ptr
+ %808 = getelementptr %struct.edge_rec, ptr %807, i32 0, i32 1
+ %809 = load ptr, ptr %808, align 4
+ %810 = ptrtoint ptr %809 to i32
%811 = add i32 %810, 16
%812 = and i32 %811, 63
%813 = and i32 %810, -64
%814 = or i32 %812, %813
- %815 = inttoptr i32 %814 to %struct.edge_rec*
- %816 = load %struct.edge_rec*, %struct.edge_rec** %793, align 4
- %817 = ptrtoint %struct.edge_rec* %816 to i32
+ %815 = inttoptr i32 %814 to ptr
+ %816 = load ptr, ptr %793, align 4
+ %817 = ptrtoint ptr %816 to i32
%818 = add i32 %817, 16
%819 = and i32 %818, 63
%820 = and i32 %817, -64
%821 = or i32 %819, %820
- %822 = inttoptr i32 %821 to %struct.edge_rec*
- %823 = getelementptr %struct.edge_rec, %struct.edge_rec* %822, i32 0, i32 1
- %824 = load %struct.edge_rec*, %struct.edge_rec** %823, align 4
- %825 = getelementptr %struct.edge_rec, %struct.edge_rec* %815, i32 0, i32 1
- %826 = load %struct.edge_rec*, %struct.edge_rec** %825, align 4
- store %struct.edge_rec* %824, %struct.edge_rec** %825, align 4
- store %struct.edge_rec* %826, %struct.edge_rec** %823, align 4
- %827 = load %struct.edge_rec*, %struct.edge_rec** %808, align 4
- %828 = load %struct.edge_rec*, %struct.edge_rec** %793, align 4
- store %struct.edge_rec* %827, %struct.edge_rec** %793, align 4
- store %struct.edge_rec* %828, %struct.edge_rec** %808, align 4
+ %822 = inttoptr i32 %821 to ptr
+ %823 = getelementptr %struct.edge_rec, ptr %822, i32 0, i32 1
+ %824 = load ptr, ptr %823, align 4
+ %825 = getelementptr %struct.edge_rec, ptr %815, i32 0, i32 1
+ %826 = load ptr, ptr %825, align 4
+ store ptr %824, ptr %825, align 4
+ store ptr %826, ptr %823, align 4
+ %827 = load ptr, ptr %808, align 4
+ %828 = load ptr, ptr %793, align 4
+ store ptr %827, ptr %793, align 4
+ store ptr %828, ptr %808, align 4
%829 = xor i32 %795, 32
- %830 = inttoptr i32 %829 to %struct.edge_rec*
- %831 = getelementptr %struct.edge_rec, %struct.edge_rec* %830, i32 0, i32 0
- %832 = load %struct.VERTEX*, %struct.VERTEX** %831, align 4
+ %830 = inttoptr i32 %829 to ptr
+ %831 = getelementptr %struct.edge_rec, ptr %830, i32 0, i32 0
+ %832 = load ptr, ptr %831, align 4
%833 = and i32 %798, 63
%834 = and i32 %795, -64
%835 = or i32 %833, %834
- %836 = inttoptr i32 %835 to %struct.edge_rec*
- %837 = getelementptr %struct.edge_rec, %struct.edge_rec* %836, i32 0, i32 1
- %838 = load %struct.edge_rec*, %struct.edge_rec** %837, align 4
- %839 = ptrtoint %struct.edge_rec* %838 to i32
+ %836 = inttoptr i32 %835 to ptr
+ %837 = getelementptr %struct.edge_rec, ptr %836, i32 0, i32 1
+ %838 = load ptr, ptr %837, align 4
+ %839 = ptrtoint ptr %838 to i32
%840 = add i32 %839, 16
%841 = and i32 %840, 63
%842 = and i32 %839, -64
%843 = or i32 %841, %842
- %844 = inttoptr i32 %843 to %struct.edge_rec*
- %845 = load %struct.VERTEX*, %struct.VERTEX** %767, align 4
- %846 = call %struct.edge_rec* @alloc_edge() nounwind
- %847 = getelementptr %struct.edge_rec, %struct.edge_rec* %846, i32 0, i32 1
- store %struct.edge_rec* %846, %struct.edge_rec** %847, align 4
- %848 = getelementptr %struct.edge_rec, %struct.edge_rec* %846, i32 0, i32 0
- store %struct.VERTEX* %832, %struct.VERTEX** %848, align 4
- %849 = ptrtoint %struct.edge_rec* %846 to i32
+ %844 = inttoptr i32 %843 to ptr
+ %845 = load ptr, ptr %767, align 4
+ %846 = call ptr @alloc_edge() nounwind
+ %847 = getelementptr %struct.edge_rec, ptr %846, i32 0, i32 1
+ store ptr %846, ptr %847, align 4
+ %848 = getelementptr %struct.edge_rec, ptr %846, i32 0, i32 0
+ store ptr %832, ptr %848, align 4
+ %849 = ptrtoint ptr %846 to i32
%850 = add i32 %849, 16
- %851 = inttoptr i32 %850 to %struct.edge_rec*
+ %851 = inttoptr i32 %850 to ptr
%852 = add i32 %849, 48
- %853 = inttoptr i32 %852 to %struct.edge_rec*
- %854 = getelementptr %struct.edge_rec, %struct.edge_rec* %851, i32 0, i32 1
- store %struct.edge_rec* %853, %struct.edge_rec** %854, align 4
+ %853 = inttoptr i32 %852 to ptr
+ %854 = getelementptr %struct.edge_rec, ptr %851, i32 0, i32 1
+ store ptr %853, ptr %854, align 4
%855 = add i32 %849, 32
- %856 = inttoptr i32 %855 to %struct.edge_rec*
- %857 = getelementptr %struct.edge_rec, %struct.edge_rec* %856, i32 0, i32 1
- store %struct.edge_rec* %856, %struct.edge_rec** %857, align 4
- %858 = getelementptr %struct.edge_rec, %struct.edge_rec* %856, i32 0, i32 0
- store %struct.VERTEX* %845, %struct.VERTEX** %858, align 4
- %859 = getelementptr %struct.edge_rec, %struct.edge_rec* %853, i32 0, i32 1
- store %struct.edge_rec* %851, %struct.edge_rec** %859, align 4
- %860 = load %struct.edge_rec*, %struct.edge_rec** %847, align 4
- %861 = ptrtoint %struct.edge_rec* %860 to i32
+ %856 = inttoptr i32 %855 to ptr
+ %857 = getelementptr %struct.edge_rec, ptr %856, i32 0, i32 1
+ store ptr %856, ptr %857, align 4
+ %858 = getelementptr %struct.edge_rec, ptr %856, i32 0, i32 0
+ store ptr %845, ptr %858, align 4
+ %859 = getelementptr %struct.edge_rec, ptr %853, i32 0, i32 1
+ store ptr %851, ptr %859, align 4
+ %860 = load ptr, ptr %847, align 4
+ %861 = ptrtoint ptr %860 to i32
%862 = add i32 %861, 16
%863 = and i32 %862, 63
%864 = and i32 %861, -64
%865 = or i32 %863, %864
- %866 = inttoptr i32 %865 to %struct.edge_rec*
- %867 = getelementptr %struct.edge_rec, %struct.edge_rec* %844, i32 0, i32 1
- %868 = load %struct.edge_rec*, %struct.edge_rec** %867, align 4
- %869 = ptrtoint %struct.edge_rec* %868 to i32
+ %866 = inttoptr i32 %865 to ptr
+ %867 = getelementptr %struct.edge_rec, ptr %844, i32 0, i32 1
+ %868 = load ptr, ptr %867, align 4
+ %869 = ptrtoint ptr %868 to i32
%870 = add i32 %869, 16
%871 = and i32 %870, 63
%872 = and i32 %869, -64
%873 = or i32 %871, %872
- %874 = inttoptr i32 %873 to %struct.edge_rec*
- %875 = getelementptr %struct.edge_rec, %struct.edge_rec* %874, i32 0, i32 1
- %876 = load %struct.edge_rec*, %struct.edge_rec** %875, align 4
- %877 = getelementptr %struct.edge_rec, %struct.edge_rec* %866, i32 0, i32 1
- %878 = load %struct.edge_rec*, %struct.edge_rec** %877, align 4
- store %struct.edge_rec* %876, %struct.edge_rec** %877, align 4
- store %struct.edge_rec* %878, %struct.edge_rec** %875, align 4
- %879 = load %struct.edge_rec*, %struct.edge_rec** %847, align 4
- %880 = load %struct.edge_rec*, %struct.edge_rec** %867, align 4
- store %struct.edge_rec* %879, %struct.edge_rec** %867, align 4
- store %struct.edge_rec* %880, %struct.edge_rec** %847, align 4
+ %874 = inttoptr i32 %873 to ptr
+ %875 = getelementptr %struct.edge_rec, ptr %874, i32 0, i32 1
+ %876 = load ptr, ptr %875, align 4
+ %877 = getelementptr %struct.edge_rec, ptr %866, i32 0, i32 1
+ %878 = load ptr, ptr %877, align 4
+ store ptr %876, ptr %877, align 4
+ store ptr %878, ptr %875, align 4
+ %879 = load ptr, ptr %847, align 4
+ %880 = load ptr, ptr %867, align 4
+ store ptr %879, ptr %867, align 4
+ store ptr %880, ptr %847, align 4
%881 = xor i32 %849, 32
- %882 = inttoptr i32 %881 to %struct.edge_rec*
- %883 = getelementptr %struct.edge_rec, %struct.edge_rec* %882, i32 0, i32 1
- %884 = load %struct.edge_rec*, %struct.edge_rec** %883, align 4
- %885 = ptrtoint %struct.edge_rec* %884 to i32
+ %882 = inttoptr i32 %881 to ptr
+ %883 = getelementptr %struct.edge_rec, ptr %882, i32 0, i32 1
+ %884 = load ptr, ptr %883, align 4
+ %885 = ptrtoint ptr %884 to i32
%886 = add i32 %885, 16
%887 = and i32 %886, 63
%888 = and i32 %885, -64
%889 = or i32 %887, %888
- %890 = inttoptr i32 %889 to %struct.edge_rec*
- %891 = load %struct.edge_rec*, %struct.edge_rec** %766, align 4
- %892 = ptrtoint %struct.edge_rec* %891 to i32
+ %890 = inttoptr i32 %889 to ptr
+ %891 = load ptr, ptr %766, align 4
+ %892 = ptrtoint ptr %891 to i32
%893 = add i32 %892, 16
%894 = and i32 %893, 63
%895 = and i32 %892, -64
%896 = or i32 %894, %895
- %897 = inttoptr i32 %896 to %struct.edge_rec*
- %898 = getelementptr %struct.edge_rec, %struct.edge_rec* %897, i32 0, i32 1
- %899 = load %struct.edge_rec*, %struct.edge_rec** %898, align 4
- %900 = getelementptr %struct.edge_rec, %struct.edge_rec* %890, i32 0, i32 1
- %901 = load %struct.edge_rec*, %struct.edge_rec** %900, align 4
- store %struct.edge_rec* %899, %struct.edge_rec** %900, align 4
- store %struct.edge_rec* %901, %struct.edge_rec** %898, align 4
- %902 = load %struct.edge_rec*, %struct.edge_rec** %883, align 4
- %903 = load %struct.edge_rec*, %struct.edge_rec** %766, align 4
- store %struct.edge_rec* %902, %struct.edge_rec** %766, align 4
- store %struct.edge_rec* %903, %struct.edge_rec** %883, align 4
- %904 = getelementptr %struct.VERTEX, %struct.VERTEX* %763, i32 0, i32 0, i32 0
- %905 = load double, double* %904, align 4
- %906 = getelementptr %struct.VERTEX, %struct.VERTEX* %763, i32 0, i32 0, i32 1
- %907 = load double, double* %906, align 4
- %908 = getelementptr %struct.VERTEX, %struct.VERTEX* %extra, i32 0, i32 0, i32 0
- %909 = load double, double* %908, align 4
- %910 = getelementptr %struct.VERTEX, %struct.VERTEX* %extra, i32 0, i32 0, i32 1
- %911 = load double, double* %910, align 4
- %912 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 0, i32 0
- %913 = load double, double* %912, align 4
- %914 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 0, i32 1
- %915 = load double, double* %914, align 4
+ %897 = inttoptr i32 %896 to ptr
+ %898 = getelementptr %struct.edge_rec, ptr %897, i32 0, i32 1
+ %899 = load ptr, ptr %898, align 4
+ %900 = getelementptr %struct.edge_rec, ptr %890, i32 0, i32 1
+ %901 = load ptr, ptr %900, align 4
+ store ptr %899, ptr %900, align 4
+ store ptr %901, ptr %898, align 4
+ %902 = load ptr, ptr %883, align 4
+ %903 = load ptr, ptr %766, align 4
+ store ptr %902, ptr %766, align 4
+ store ptr %903, ptr %883, align 4
+ %904 = getelementptr %struct.VERTEX, ptr %763, i32 0, i32 0, i32 0
+ %905 = load double, ptr %904, align 4
+ %906 = getelementptr %struct.VERTEX, ptr %763, i32 0, i32 0, i32 1
+ %907 = load double, ptr %906, align 4
+ %908 = getelementptr %struct.VERTEX, ptr %extra, i32 0, i32 0, i32 0
+ %909 = load double, ptr %908, align 4
+ %910 = getelementptr %struct.VERTEX, ptr %extra, i32 0, i32 0, i32 1
+ %911 = load double, ptr %910, align 4
+ %912 = getelementptr %struct.VERTEX, ptr %tree, i32 0, i32 0, i32 0
+ %913 = load double, ptr %912, align 4
+ %914 = getelementptr %struct.VERTEX, ptr %tree, i32 0, i32 0, i32 1
+ %915 = load double, ptr %914, align 4
%916 = fsub double %905, %913
%917 = fsub double %911, %915
%918 = fmul double %916, %917
@@ -1226,93 +1226,93 @@ bb14:
%932 = and i32 %850, 63
%933 = and i32 %849, -64
%934 = or i32 %932, %933
- %935 = inttoptr i32 %934 to %struct.edge_rec*
- %936 = getelementptr %struct.edge_rec, %struct.edge_rec* %935, i32 0, i32 1
- %937 = load %struct.edge_rec*, %struct.edge_rec** %936, align 4
- %938 = ptrtoint %struct.edge_rec* %937 to i32
+ %935 = inttoptr i32 %934 to ptr
+ %936 = getelementptr %struct.edge_rec, ptr %935, i32 0, i32 1
+ %937 = load ptr, ptr %936, align 4
+ %938 = ptrtoint ptr %937 to i32
%939 = add i32 %938, 16
%940 = and i32 %939, 63
%941 = and i32 %938, -64
%942 = or i32 %940, %941
- %943 = inttoptr i32 %942 to %struct.edge_rec*
- %944 = load %struct.edge_rec*, %struct.edge_rec** %847, align 4
- %945 = ptrtoint %struct.edge_rec* %944 to i32
+ %943 = inttoptr i32 %942 to ptr
+ %944 = load ptr, ptr %847, align 4
+ %945 = ptrtoint ptr %944 to i32
%946 = add i32 %945, 16
%947 = and i32 %946, 63
%948 = and i32 %945, -64
%949 = or i32 %947, %948
- %950 = inttoptr i32 %949 to %struct.edge_rec*
- %951 = getelementptr %struct.edge_rec, %struct.edge_rec* %943, i32 0, i32 1
- %952 = load %struct.edge_rec*, %struct.edge_rec** %951, align 4
- %953 = ptrtoint %struct.edge_rec* %952 to i32
+ %950 = inttoptr i32 %949 to ptr
+ %951 = getelementptr %struct.edge_rec, ptr %943, i32 0, i32 1
+ %952 = load ptr, ptr %951, align 4
+ %953 = ptrtoint ptr %952 to i32
%954 = add i32 %953, 16
%955 = and i32 %954, 63
%956 = and i32 %953, -64
%957 = or i32 %955, %956
- %958 = inttoptr i32 %957 to %struct.edge_rec*
- %959 = getelementptr %struct.edge_rec, %struct.edge_rec* %958, i32 0, i32 1
- %960 = load %struct.edge_rec*, %struct.edge_rec** %959, align 4
- %961 = getelementptr %struct.edge_rec, %struct.edge_rec* %950, i32 0, i32 1
- %962 = load %struct.edge_rec*, %struct.edge_rec** %961, align 4
- store %struct.edge_rec* %960, %struct.edge_rec** %961, align 4
- store %struct.edge_rec* %962, %struct.edge_rec** %959, align 4
- %963 = load %struct.edge_rec*, %struct.edge_rec** %847, align 4
- %964 = load %struct.edge_rec*, %struct.edge_rec** %951, align 4
- store %struct.edge_rec* %963, %struct.edge_rec** %951, align 4
- store %struct.edge_rec* %964, %struct.edge_rec** %847, align 4
+ %958 = inttoptr i32 %957 to ptr
+ %959 = getelementptr %struct.edge_rec, ptr %958, i32 0, i32 1
+ %960 = load ptr, ptr %959, align 4
+ %961 = getelementptr %struct.edge_rec, ptr %950, i32 0, i32 1
+ %962 = load ptr, ptr %961, align 4
+ store ptr %960, ptr %961, align 4
+ store ptr %962, ptr %959, align 4
+ %963 = load ptr, ptr %847, align 4
+ %964 = load ptr, ptr %951, align 4
+ store ptr %963, ptr %951, align 4
+ store ptr %964, ptr %847, align 4
%965 = add i32 %881, 16
%966 = and i32 %965, 63
%967 = or i32 %966, %933
- %968 = inttoptr i32 %967 to %struct.edge_rec*
- %969 = getelementptr %struct.edge_rec, %struct.edge_rec* %968, i32 0, i32 1
- %970 = load %struct.edge_rec*, %struct.edge_rec** %969, align 4
- %971 = ptrtoint %struct.edge_rec* %970 to i32
+ %968 = inttoptr i32 %967 to ptr
+ %969 = getelementptr %struct.edge_rec, ptr %968, i32 0, i32 1
+ %970 = load ptr, ptr %969, align 4
+ %971 = ptrtoint ptr %970 to i32
%972 = add i32 %971, 16
%973 = and i32 %972, 63
%974 = and i32 %971, -64
%975 = or i32 %973, %974
- %976 = inttoptr i32 %975 to %struct.edge_rec*
- %977 = load %struct.edge_rec*, %struct.edge_rec** %883, align 4
- %978 = ptrtoint %struct.edge_rec* %977 to i32
+ %976 = inttoptr i32 %975 to ptr
+ %977 = load ptr, ptr %883, align 4
+ %978 = ptrtoint ptr %977 to i32
%979 = add i32 %978, 16
%980 = and i32 %979, 63
%981 = and i32 %978, -64
%982 = or i32 %980, %981
- %983 = inttoptr i32 %982 to %struct.edge_rec*
- %984 = getelementptr %struct.edge_rec, %struct.edge_rec* %976, i32 0, i32 1
- %985 = load %struct.edge_rec*, %struct.edge_rec** %984, align 4
- %986 = ptrtoint %struct.edge_rec* %985 to i32
+ %983 = inttoptr i32 %982 to ptr
+ %984 = getelementptr %struct.edge_rec, ptr %976, i32 0, i32 1
+ %985 = load ptr, ptr %984, align 4
+ %986 = ptrtoint ptr %985 to i32
%987 = add i32 %986, 16
%988 = and i32 %987, 63
%989 = and i32 %986, -64
%990 = or i32 %988, %989
- %991 = inttoptr i32 %990 to %struct.edge_rec*
- %992 = getelementptr %struct.edge_rec, %struct.edge_rec* %991, i32 0, i32 1
- %993 = load %struct.edge_rec*, %struct.edge_rec** %992, align 4
- %994 = getelementptr %struct.edge_rec, %struct.edge_rec* %983, i32 0, i32 1
- %995 = load %struct.edge_rec*, %struct.edge_rec** %994, align 4
- store %struct.edge_rec* %993, %struct.edge_rec** %994, align 4
- store %struct.edge_rec* %995, %struct.edge_rec** %992, align 4
- %996 = load %struct.edge_rec*, %struct.edge_rec** %883, align 4
- %997 = load %struct.edge_rec*, %struct.edge_rec** %984, align 4
- store %struct.edge_rec* %996, %struct.edge_rec** %984, align 4
- store %struct.edge_rec* %997, %struct.edge_rec** %883, align 4
- %998 = inttoptr i32 %933 to %struct.edge_rec*
- %999 = load %struct.edge_rec*, %struct.edge_rec** @avail_edge, align 4
- %1000 = getelementptr %struct.edge_rec, %struct.edge_rec* %998, i32 0, i32 1
- store %struct.edge_rec* %999, %struct.edge_rec** %1000, align 4
- store %struct.edge_rec* %998, %struct.edge_rec** @avail_edge, align 4
+ %991 = inttoptr i32 %990 to ptr
+ %992 = getelementptr %struct.edge_rec, ptr %991, i32 0, i32 1
+ %993 = load ptr, ptr %992, align 4
+ %994 = getelementptr %struct.edge_rec, ptr %983, i32 0, i32 1
+ %995 = load ptr, ptr %994, align 4
+ store ptr %993, ptr %994, align 4
+ store ptr %995, ptr %992, align 4
+ %996 = load ptr, ptr %883, align 4
+ %997 = load ptr, ptr %984, align 4
+ store ptr %996, ptr %984, align 4
+ store ptr %997, ptr %883, align 4
+ %998 = inttoptr i32 %933 to ptr
+ %999 = load ptr, ptr @avail_edge, align 4
+ %1000 = getelementptr %struct.edge_rec, ptr %998, i32 0, i32 1
+ store ptr %999, ptr %1000, align 4
+ store ptr %998, ptr @avail_edge, align 4
br label %bb15
bb15:
%retval.1.0 = phi i32 [ %780, %bb10 ], [ %829, %bb13 ], [ %829, %bb14 ], [ %tmp4, %bb6 ], [ %849, %bb11 ]
%retval.0.0 = phi i32 [ %769, %bb10 ], [ %781, %bb13 ], [ %781, %bb14 ], [ %tmp16, %bb6 ], [ %881, %bb11 ]
- %agg.result162 = bitcast %struct.EDGE_PAIR* %agg.result to i64*
+ %agg.result162 = bitcast ptr %agg.result to ptr
%1001 = zext i32 %retval.0.0 to i64
%1002 = zext i32 %retval.1.0 to i64
%1003 = shl i64 %1002, 32
%1004 = or i64 %1003, %1001
- store i64 %1004, i64* %agg.result162, align 4
+ store i64 %1004, ptr %agg.result162, align 4
ret void
}
@@ -1331,8 +1331,8 @@ bb15:
; CHECK: vcmp
; CHECK: vcmp
-declare i32 @puts(i8* nocapture) nounwind
+declare i32 @puts(ptr nocapture) nounwind
declare void @exit(i32) noreturn nounwind
-declare %struct.edge_rec* @alloc_edge() nounwind
+declare ptr @alloc_edge() nounwind
diff --git a/llvm/test/CodeGen/ARM/Windows/wineh-basic.ll b/llvm/test/CodeGen/ARM/Windows/wineh-basic.ll
index c4e809f6dd878..d0bdd66d131d2 100644
--- a/llvm/test/CodeGen/ARM/Windows/wineh-basic.ll
+++ b/llvm/test/CodeGen/ARM/Windows/wineh-basic.ll
@@ -16,9 +16,9 @@ target triple = "thumbv7--windows-msvc19.0.24210"
%class.field = type { i8 }
; Function Attrs: nounwind
-define arm_aapcs_vfpcc void @"\01??1field@@AAA at XZ"(%class.field* nocapture readnone %this) unnamed_addr #0 align 2 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+define arm_aapcs_vfpcc void @"\01??1field@@AAA at XZ"(ptr nocapture readnone %this) unnamed_addr #0 align 2 personality ptr @__CxxFrameHandler3 {
entry:
- invoke arm_aapcs_vfpcc void @free(i8* null)
+ invoke arm_aapcs_vfpcc void @free(ptr null)
to label %invoke.cont unwind label %terminate
invoke.cont: ; preds = %entry
@@ -30,7 +30,7 @@ terminate: ; preds = %entry
unreachable
}
-declare arm_aapcs_vfpcc void @free(i8*) local_unnamed_addr #1
+declare arm_aapcs_vfpcc void @free(ptr) local_unnamed_addr #1
declare arm_aapcs_vfpcc i32 @__CxxFrameHandler3(...)
diff --git a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
index 9c2da345956d1..17d1ca65430af 100644
--- a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
+++ b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
@@ -24,7 +24,7 @@ declare arm_aapcs_vfpcc <16 x i8> @get_inputf32(float) local_unnamed_addr
-define arm_aapcs_vfpcc void @aese_zero(<16 x i8>* %0) nounwind {
+define arm_aapcs_vfpcc void @aese_zero(ptr %0) nounwind {
; CHECK-FIX-LABEL: aese_zero:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
@@ -33,14 +33,14 @@ define arm_aapcs_vfpcc void @aese_zero(<16 x i8>* %0) nounwind {
; CHECK-FIX-NEXT: aesmc.8 q8, q9
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: bx lr
- %2 = load <16 x i8>, <16 x i8>* %0, align 8
+ %2 = load <16 x i8>, ptr %0, align 8
%3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> zeroinitializer, <16 x i8> %2)
%4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
- store <16 x i8> %4, <16 x i8>* %0, align 8
+ store <16 x i8> %4, ptr %0, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_via_call1(<16 x i8>* %0) nounwind {
+define arm_aapcs_vfpcc void @aese_via_call1(ptr %0) nounwind {
; CHECK-FIX-LABEL: aese_via_call1:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: .save {r4, lr}
@@ -54,14 +54,14 @@ define arm_aapcs_vfpcc void @aese_via_call1(<16 x i8>* %0) nounwind {
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: pop {r4, pc}
%2 = call arm_aapcs_vfpcc <16 x i8> @get_input()
- %3 = load <16 x i8>, <16 x i8>* %0, align 8
+ %3 = load <16 x i8>, ptr %0, align 8
%4 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %2, <16 x i8> %3)
%5 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %4)
- store <16 x i8> %5, <16 x i8>* %0, align 8
+ store <16 x i8> %5, ptr %0, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_via_call2(half %0, <16 x i8>* %1) nounwind {
+define arm_aapcs_vfpcc void @aese_via_call2(half %0, ptr %1) nounwind {
; CHECK-FIX-LABEL: aese_via_call2:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: .save {r4, lr}
@@ -75,14 +75,14 @@ define arm_aapcs_vfpcc void @aese_via_call2(half %0, <16 x i8>* %1) nounwind {
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: pop {r4, pc}
%3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf16(half %0)
- %4 = load <16 x i8>, <16 x i8>* %1, align 8
+ %4 = load <16 x i8>, ptr %1, align 8
%5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
%6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
- store <16 x i8> %6, <16 x i8>* %1, align 8
+ store <16 x i8> %6, ptr %1, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_via_call3(float %0, <16 x i8>* %1) nounwind {
+define arm_aapcs_vfpcc void @aese_via_call3(float %0, ptr %1) nounwind {
; CHECK-FIX-LABEL: aese_via_call3:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: .save {r4, lr}
@@ -96,14 +96,14 @@ define arm_aapcs_vfpcc void @aese_via_call3(float %0, <16 x i8>* %1) nounwind {
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: pop {r4, pc}
%3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf32(float %0)
- %4 = load <16 x i8>, <16 x i8>* %1, align 8
+ %4 = load <16 x i8>, ptr %1, align 8
%5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
%6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
- store <16 x i8> %6, <16 x i8>* %1, align 8
+ store <16 x i8> %6, ptr %1, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_once_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind {
+define arm_aapcs_vfpcc void @aese_once_via_ptr(ptr %0, ptr %1) nounwind {
; CHECK-FIX-LABEL: aese_once_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
@@ -112,11 +112,11 @@ define arm_aapcs_vfpcc void @aese_once_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nou
; CHECK-FIX-NEXT: aesmc.8 q8, q9
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
- %3 = load <16 x i8>, <16 x i8>* %1, align 8
- %4 = load <16 x i8>, <16 x i8>* %0, align 8
+ %3 = load <16 x i8>, ptr %1, align 8
+ %4 = load <16 x i8>, ptr %0, align 8
%5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
%6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
- store <16 x i8> %6, <16 x i8>* %1, align 8
+ store <16 x i8> %6, ptr %1, align 8
ret void
}
@@ -133,7 +133,7 @@ define arm_aapcs_vfpcc <16 x i8> @aese_once_via_val(<16 x i8> %0, <16 x i8> %1)
ret <16 x i8> %4
}
-define arm_aapcs_vfpcc void @aese_twice_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind {
+define arm_aapcs_vfpcc void @aese_twice_via_ptr(ptr %0, ptr %1) nounwind {
; CHECK-FIX-LABEL: aese_twice_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
@@ -146,15 +146,15 @@ define arm_aapcs_vfpcc void @aese_twice_via_ptr(<16 x i8>* %0, <16 x i8>* %1) no
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
- %3 = load <16 x i8>, <16 x i8>* %1, align 8
- %4 = load <16 x i8>, <16 x i8>* %0, align 8
+ %3 = load <16 x i8>, ptr %1, align 8
+ %4 = load <16 x i8>, ptr %0, align 8
%5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
%6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
- store <16 x i8> %6, <16 x i8>* %1, align 8
- %7 = load <16 x i8>, <16 x i8>* %0, align 8
+ store <16 x i8> %6, ptr %1, align 8
+ %7 = load <16 x i8>, ptr %0, align 8
%8 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %6, <16 x i8> %7)
%9 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %8)
- store <16 x i8> %9, <16 x i8>* %1, align 8
+ store <16 x i8> %9, ptr %1, align 8
ret void
}
@@ -176,7 +176,7 @@ define arm_aapcs_vfpcc <16 x i8> @aese_twice_via_val(<16 x i8> %0, <16 x i8> %1)
ret <16 x i8> %6
}
-define arm_aapcs_vfpcc void @aese_loop_via_ptr(i32 %0, <16 x i8>* %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aese_loop_via_ptr(i32 %0, ptr %1, ptr %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_loop_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
@@ -214,11 +214,11 @@ define arm_aapcs_vfpcc void @aese_loop_via_ptr(i32 %0, <16 x i8>* %1, <16 x i8>*
6:
%7 = phi i32 [ %12, %6 ], [ 0, %3 ]
- %8 = load <16 x i8>, <16 x i8>* %2, align 8
- %9 = load <16 x i8>, <16 x i8>* %1, align 8
+ %8 = load <16 x i8>, ptr %2, align 8
+ %9 = load <16 x i8>, ptr %1, align 8
%10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %9)
%11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10)
- store <16 x i8> %11, <16 x i8>* %2, align 8
+ store <16 x i8> %11, ptr %2, align 8
%12 = add nuw i32 %7, 1
%13 = icmp eq i32 %12, %0
br i1 %13, label %5, label %6
@@ -256,7 +256,7 @@ define arm_aapcs_vfpcc <16 x i8> @aese_loop_via_val(i32 %0, <16 x i8> %1, <16 x
br i1 %13, label %5, label %7
}
-define arm_aapcs_vfpcc void @aese_set8_via_ptr(i8* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aese_set8_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_set8_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
@@ -280,17 +280,17 @@ define arm_aapcs_vfpcc void @aese_set8_via_ptr(i8* %0, <16 x i8> %1, <16 x i8>*
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
- %4 = load i8, i8* %0, align 1
- %5 = load <16 x i8>, <16 x i8>* %2, align 8
+ %4 = load i8, ptr %0, align 1
+ %5 = load <16 x i8>, ptr %2, align 8
%6 = insertelement <16 x i8> %5, i8 %4, i64 0
%7 = insertelement <16 x i8> %1, i8 %4, i64 0
%8 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %6, <16 x i8> %7)
%9 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %8)
- store <16 x i8> %9, <16 x i8>* %2, align 8
+ store <16 x i8> %9, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aese_set8_via_val(i8 zeroext %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-LABEL: aese_set8_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -301,16 +301,16 @@ define arm_aapcs_vfpcc void @aese_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
- %4 = load <16 x i8>, <16 x i8>* %2, align 8
+ %4 = load <16 x i8>, ptr %2, align 8
%5 = insertelement <16 x i8> %4, i8 %0, i64 0
%6 = insertelement <16 x i8> %1, i8 %0, i64 0
%7 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %5, <16 x i8> %6)
%8 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %7)
- store <16 x i8> %8, <16 x i8>* %2, align 8
+ store <16 x i8> %8, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aese_set8_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -336,13 +336,13 @@ define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x
br i1 %0, label %5, label %9
5:
- %6 = load i8, i8* %1, align 1
- %7 = load <16 x i8>, <16 x i8>* %3, align 8
+ %6 = load i8, ptr %1, align 1
+ %7 = load <16 x i8>, ptr %3, align 8
%8 = insertelement <16 x i8> %7, i8 %6, i64 0
br label %11
9:
- %10 = load <16 x i8>, <16 x i8>* %3, align 8
+ %10 = load <16 x i8>, ptr %3, align 8
br label %11
11:
@@ -350,7 +350,7 @@ define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x
br i1 %0, label %13, label %16
13:
- %14 = load i8, i8* %1, align 1
+ %14 = load i8, ptr %1, align 1
%15 = insertelement <16 x i8> %2, i8 %14, i64 0
br label %16
@@ -358,11 +358,11 @@ define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x
%17 = phi <16 x i8> [ %15, %13 ], [ %2, %11 ]
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %17)
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
- store <16 x i8> %19, <16 x i8>* %3, align 8
+ store <16 x i8> %19, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aese_set8_cond_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -381,18 +381,18 @@ define arm_aapcs_vfpcc void @aese_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
- %5 = load <16 x i8>, <16 x i8>* %3, align 8
+ %5 = load <16 x i8>, ptr %3, align 8
%6 = insertelement <16 x i8> %5, i8 %1, i64 0
%7 = select i1 %0, <16 x i8> %6, <16 x i8> %5
%8 = insertelement <16 x i8> %2, i8 %1, i64 0
%9 = select i1 %0, <16 x i8> %8, <16 x i8> %2
%10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %9)
%11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10)
- store <16 x i8> %11, <16 x i8>* %3, align 8
+ store <16 x i8> %11, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_set8_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aese_set8_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -411,19 +411,19 @@ define arm_aapcs_vfpcc void @aese_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
- %5 = load i8, i8* %1, align 1
+ %5 = load i8, ptr %1, align 1
%6 = insertelement <16 x i8> %2, i8 %5, i64 0
- %7 = getelementptr inbounds <16 x i8>, <16 x i8>* %3, i32 0, i32 0
- store i8 %5, i8* %7, align 8
+ %7 = getelementptr inbounds <16 x i8>, ptr %3, i32 0, i32 0
+ store i8 %5, ptr %7, align 8
%8 = icmp eq i32 %0, 0
br i1 %8, label %12, label %9
9:
- %10 = load <16 x i8>, <16 x i8>* %3, align 8
+ %10 = load <16 x i8>, ptr %3, align 8
br label %13
11:
- store <16 x i8> %17, <16 x i8>* %3, align 8
+ store <16 x i8> %17, ptr %3, align 8
br label %12
12:
@@ -439,7 +439,7 @@ define arm_aapcs_vfpcc void @aese_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2
br i1 %19, label %11, label %13
}
-define arm_aapcs_vfpcc void @aese_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aese_set8_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -462,11 +462,11 @@ define arm_aapcs_vfpcc void @aese_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x
6:
%7 = insertelement <16 x i8> %2, i8 %1, i64 0
- %8 = load <16 x i8>, <16 x i8>* %3, align 8
+ %8 = load <16 x i8>, ptr %3, align 8
br label %11
9:
- store <16 x i8> %16, <16 x i8>* %3, align 8
+ store <16 x i8> %16, ptr %3, align 8
br label %10
10:
@@ -483,7 +483,7 @@ define arm_aapcs_vfpcc void @aese_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x
br i1 %18, label %9, label %11
}
-define arm_aapcs_vfpcc void @aese_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aese_set16_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_set16_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
@@ -507,9 +507,9 @@ define arm_aapcs_vfpcc void @aese_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8>
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
- %4 = load i16, i16* %0, align 2
- %5 = bitcast <16 x i8>* %2 to <8 x i16>*
- %6 = load <8 x i16>, <8 x i16>* %5, align 8
+ %4 = load i16, ptr %0, align 2
+ %5 = bitcast ptr %2 to ptr
+ %6 = load <8 x i16>, ptr %5, align 8
%7 = insertelement <8 x i16> %6, i16 %4, i64 0
%8 = bitcast <8 x i16> %7 to <16 x i8>
%9 = bitcast <16 x i8> %1 to <8 x i16>
@@ -517,11 +517,11 @@ define arm_aapcs_vfpcc void @aese_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8>
%11 = bitcast <8 x i16> %10 to <16 x i8>
%12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
%13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
- store <16 x i8> %13, <16 x i8>* %2, align 8
+ store <16 x i8> %13, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_set16_via_val(i16 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aese_set16_via_val(i16 zeroext %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-LABEL: aese_set16_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -532,8 +532,8 @@ define arm_aapcs_vfpcc void @aese_set16_via_val(i16 zeroext %0, <16 x i8> %1, <1
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
- %4 = bitcast <16 x i8>* %2 to <8 x i16>*
- %5 = load <8 x i16>, <8 x i16>* %4, align 8
+ %4 = bitcast ptr %2 to ptr
+ %5 = load <8 x i16>, ptr %4, align 8
%6 = insertelement <8 x i16> %5, i16 %0, i64 0
%7 = bitcast <8 x i16> %6 to <16 x i8>
%8 = bitcast <16 x i8> %1 to <8 x i16>
@@ -541,11 +541,11 @@ define arm_aapcs_vfpcc void @aese_set16_via_val(i16 zeroext %0, <16 x i8> %1, <1
%10 = bitcast <8 x i16> %9 to <16 x i8>
%11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
%12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
- store <16 x i8> %12, <16 x i8>* %2, align 8
+ store <16 x i8> %12, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_set16_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aese_set16_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -571,15 +571,15 @@ define arm_aapcs_vfpcc void @aese_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16
br i1 %0, label %5, label %10
5:
- %6 = load i16, i16* %1, align 2
- %7 = bitcast <16 x i8>* %3 to <8 x i16>*
- %8 = load <8 x i16>, <8 x i16>* %7, align 8
+ %6 = load i16, ptr %1, align 2
+ %7 = bitcast ptr %3 to ptr
+ %8 = load <8 x i16>, ptr %7, align 8
%9 = insertelement <8 x i16> %8, i16 %6, i64 0
br label %13
10:
- %11 = bitcast <16 x i8>* %3 to <8 x i16>*
- %12 = load <8 x i16>, <8 x i16>* %11, align 8
+ %11 = bitcast ptr %3 to ptr
+ %12 = load <8 x i16>, ptr %11, align 8
br label %13
13:
@@ -587,7 +587,7 @@ define arm_aapcs_vfpcc void @aese_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16
br i1 %0, label %15, label %19
15:
- %16 = load i16, i16* %1, align 2
+ %16 = load i16, ptr %1, align 2
%17 = bitcast <16 x i8> %2 to <8 x i16>
%18 = insertelement <8 x i16> %17, i16 %16, i64 0
br label %21
@@ -602,11 +602,11 @@ define arm_aapcs_vfpcc void @aese_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16
%24 = bitcast <8 x i16> %22 to <16 x i8>
%25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
%26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
- store <16 x i8> %26, <16 x i8>* %3, align 8
+ store <16 x i8> %26, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aese_set16_cond_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -625,8 +625,8 @@ define arm_aapcs_vfpcc void @aese_set16_cond_via_val(i1 zeroext %0, i16 zeroext
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
- %5 = bitcast <16 x i8>* %3 to <8 x i16>*
- %6 = load <8 x i16>, <8 x i16>* %5, align 8
+ %5 = bitcast ptr %3 to ptr
+ %6 = load <8 x i16>, ptr %5, align 8
%7 = insertelement <8 x i16> %6, i16 %1, i64 0
%8 = select i1 %0, <8 x i16> %7, <8 x i16> %6
%9 = bitcast <16 x i8> %2 to <8 x i16>
@@ -636,11 +636,11 @@ define arm_aapcs_vfpcc void @aese_set16_cond_via_val(i1 zeroext %0, i16 zeroext
%13 = bitcast <8 x i16> %11 to <16 x i8>
%14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
%15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
- store <16 x i8> %15, <16 x i8>* %3, align 8
+ store <16 x i8> %15, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_set16_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aese_set16_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -659,21 +659,21 @@ define arm_aapcs_vfpcc void @aese_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8>
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
- %5 = load i16, i16* %1, align 2
+ %5 = load i16, ptr %1, align 2
%6 = bitcast <16 x i8> %2 to <8 x i16>
%7 = insertelement <8 x i16> %6, i16 %5, i64 0
%8 = bitcast <8 x i16> %7 to <16 x i8>
- %9 = bitcast <16 x i8>* %3 to i16*
- store i16 %5, i16* %9, align 8
+ %9 = bitcast ptr %3 to ptr
+ store i16 %5, ptr %9, align 8
%10 = icmp eq i32 %0, 0
br i1 %10, label %14, label %11
11:
- %12 = load <16 x i8>, <16 x i8>* %3, align 8
+ %12 = load <16 x i8>, ptr %3, align 8
br label %15
13:
- store <16 x i8> %19, <16 x i8>* %3, align 8
+ store <16 x i8> %19, ptr %3, align 8
br label %14
14:
@@ -689,7 +689,7 @@ define arm_aapcs_vfpcc void @aese_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8>
br i1 %21, label %13, label %15
}
-define arm_aapcs_vfpcc void @aese_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aese_set16_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -714,8 +714,8 @@ define arm_aapcs_vfpcc void @aese_set16_loop_via_val(i32 %0, i16 zeroext %1, <16
%7 = bitcast <16 x i8> %2 to <8 x i16>
%8 = insertelement <8 x i16> %7, i16 %1, i64 0
%9 = bitcast <8 x i16> %8 to <16 x i8>
- %10 = bitcast <16 x i8>* %3 to <8 x i16>*
- %11 = bitcast <16 x i8>* %3 to i16*
+ %10 = bitcast ptr %3 to ptr
+ %11 = bitcast ptr %3 to ptr
br label %13
12:
@@ -723,19 +723,19 @@ define arm_aapcs_vfpcc void @aese_set16_loop_via_val(i32 %0, i16 zeroext %1, <16
13:
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
- %15 = load <8 x i16>, <8 x i16>* %10, align 8
+ %15 = load <8 x i16>, ptr %10, align 8
%16 = insertelement <8 x i16> %15, i16 %1, i64 0
%17 = bitcast <8 x i16> %16 to <16 x i8>
- store i16 %1, i16* %11, align 8
+ store i16 %1, ptr %11, align 8
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
- store <16 x i8> %19, <16 x i8>* %3, align 8
+ store <16 x i8> %19, ptr %3, align 8
%20 = add nuw i32 %14, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %12, label %13
}
-define arm_aapcs_vfpcc void @aese_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aese_set32_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_set32_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
@@ -759,9 +759,9 @@ define arm_aapcs_vfpcc void @aese_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8>
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
- %4 = load i32, i32* %0, align 4
- %5 = bitcast <16 x i8>* %2 to <4 x i32>*
- %6 = load <4 x i32>, <4 x i32>* %5, align 8
+ %4 = load i32, ptr %0, align 4
+ %5 = bitcast ptr %2 to ptr
+ %6 = load <4 x i32>, ptr %5, align 8
%7 = insertelement <4 x i32> %6, i32 %4, i64 0
%8 = bitcast <4 x i32> %7 to <16 x i8>
%9 = bitcast <16 x i8> %1 to <4 x i32>
@@ -769,11 +769,11 @@ define arm_aapcs_vfpcc void @aese_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8>
%11 = bitcast <4 x i32> %10 to <16 x i8>
%12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
%13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
- store <16 x i8> %13, <16 x i8>* %2, align 8
+ store <16 x i8> %13, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aese_set32_via_val(i32 %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-LABEL: aese_set32_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -784,8 +784,8 @@ define arm_aapcs_vfpcc void @aese_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>*
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
- %4 = bitcast <16 x i8>* %2 to <4 x i32>*
- %5 = load <4 x i32>, <4 x i32>* %4, align 8
+ %4 = bitcast ptr %2 to ptr
+ %5 = load <4 x i32>, ptr %4, align 8
%6 = insertelement <4 x i32> %5, i32 %0, i64 0
%7 = bitcast <4 x i32> %6 to <16 x i8>
%8 = bitcast <16 x i8> %1 to <4 x i32>
@@ -793,11 +793,11 @@ define arm_aapcs_vfpcc void @aese_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>*
%10 = bitcast <4 x i32> %9 to <16 x i8>
%11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
%12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
- store <16 x i8> %12, <16 x i8>* %2, align 8
+ store <16 x i8> %12, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_set32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aese_set32_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -823,15 +823,15 @@ define arm_aapcs_vfpcc void @aese_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16
br i1 %0, label %5, label %10
5:
- %6 = load i32, i32* %1, align 4
- %7 = bitcast <16 x i8>* %3 to <4 x i32>*
- %8 = load <4 x i32>, <4 x i32>* %7, align 8
+ %6 = load i32, ptr %1, align 4
+ %7 = bitcast ptr %3 to ptr
+ %8 = load <4 x i32>, ptr %7, align 8
%9 = insertelement <4 x i32> %8, i32 %6, i64 0
br label %13
10:
- %11 = bitcast <16 x i8>* %3 to <4 x i32>*
- %12 = load <4 x i32>, <4 x i32>* %11, align 8
+ %11 = bitcast ptr %3 to ptr
+ %12 = load <4 x i32>, ptr %11, align 8
br label %13
13:
@@ -839,7 +839,7 @@ define arm_aapcs_vfpcc void @aese_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16
br i1 %0, label %15, label %19
15:
- %16 = load i32, i32* %1, align 4
+ %16 = load i32, ptr %1, align 4
%17 = bitcast <16 x i8> %2 to <4 x i32>
%18 = insertelement <4 x i32> %17, i32 %16, i64 0
br label %21
@@ -854,11 +854,11 @@ define arm_aapcs_vfpcc void @aese_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16
%24 = bitcast <4 x i32> %22 to <16 x i8>
%25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
%26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
- store <16 x i8> %26, <16 x i8>* %3, align 8
+ store <16 x i8> %26, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aese_set32_cond_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -877,8 +877,8 @@ define arm_aapcs_vfpcc void @aese_set32_cond_via_val(i1 zeroext %0, i32 %1, <16
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
- %5 = bitcast <16 x i8>* %3 to <4 x i32>*
- %6 = load <4 x i32>, <4 x i32>* %5, align 8
+ %5 = bitcast ptr %3 to ptr
+ %6 = load <4 x i32>, ptr %5, align 8
%7 = insertelement <4 x i32> %6, i32 %1, i64 0
%8 = select i1 %0, <4 x i32> %7, <4 x i32> %6
%9 = bitcast <16 x i8> %2 to <4 x i32>
@@ -888,11 +888,11 @@ define arm_aapcs_vfpcc void @aese_set32_cond_via_val(i1 zeroext %0, i32 %1, <16
%13 = bitcast <4 x i32> %11 to <16 x i8>
%14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
%15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
- store <16 x i8> %15, <16 x i8>* %3, align 8
+ store <16 x i8> %15, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_set32_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aese_set32_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -911,21 +911,21 @@ define arm_aapcs_vfpcc void @aese_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8>
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
- %5 = load i32, i32* %1, align 4
+ %5 = load i32, ptr %1, align 4
%6 = bitcast <16 x i8> %2 to <4 x i32>
%7 = insertelement <4 x i32> %6, i32 %5, i64 0
%8 = bitcast <4 x i32> %7 to <16 x i8>
- %9 = bitcast <16 x i8>* %3 to i32*
- store i32 %5, i32* %9, align 8
+ %9 = bitcast ptr %3 to ptr
+ store i32 %5, ptr %9, align 8
%10 = icmp eq i32 %0, 0
br i1 %10, label %14, label %11
11:
- %12 = load <16 x i8>, <16 x i8>* %3, align 8
+ %12 = load <16 x i8>, ptr %3, align 8
br label %15
13:
- store <16 x i8> %19, <16 x i8>* %3, align 8
+ store <16 x i8> %19, ptr %3, align 8
br label %14
14:
@@ -941,7 +941,7 @@ define arm_aapcs_vfpcc void @aese_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8>
br i1 %21, label %13, label %15
}
-define arm_aapcs_vfpcc void @aese_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aese_set32_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -966,8 +966,8 @@ define arm_aapcs_vfpcc void @aese_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %
%7 = bitcast <16 x i8> %2 to <4 x i32>
%8 = insertelement <4 x i32> %7, i32 %1, i64 0
%9 = bitcast <4 x i32> %8 to <16 x i8>
- %10 = bitcast <16 x i8>* %3 to <4 x i32>*
- %11 = bitcast <16 x i8>* %3 to i32*
+ %10 = bitcast ptr %3 to ptr
+ %11 = bitcast ptr %3 to ptr
br label %13
12:
@@ -975,19 +975,19 @@ define arm_aapcs_vfpcc void @aese_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %
13:
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
- %15 = load <4 x i32>, <4 x i32>* %10, align 8
+ %15 = load <4 x i32>, ptr %10, align 8
%16 = insertelement <4 x i32> %15, i32 %1, i64 0
%17 = bitcast <4 x i32> %16 to <16 x i8>
- store i32 %1, i32* %11, align 8
+ store i32 %1, ptr %11, align 8
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
- store <16 x i8> %19, <16 x i8>* %3, align 8
+ store <16 x i8> %19, ptr %3, align 8
%20 = add nuw i32 %14, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %12, label %13
}
-define arm_aapcs_vfpcc void @aese_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aese_set64_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_set64_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
@@ -1009,9 +1009,9 @@ define arm_aapcs_vfpcc void @aese_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
- %4 = load i64, i64* %0, align 8
- %5 = bitcast <16 x i8>* %2 to <2 x i64>*
- %6 = load <2 x i64>, <2 x i64>* %5, align 8
+ %4 = load i64, ptr %0, align 8
+ %5 = bitcast ptr %2 to ptr
+ %6 = load <2 x i64>, ptr %5, align 8
%7 = insertelement <2 x i64> %6, i64 %4, i64 0
%8 = bitcast <2 x i64> %7 to <16 x i8>
%9 = bitcast <16 x i8> %1 to <2 x i64>
@@ -1019,11 +1019,11 @@ define arm_aapcs_vfpcc void @aese_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>
%11 = bitcast <2 x i64> %10 to <16 x i8>
%12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
%13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
- store <16 x i8> %13, <16 x i8>* %2, align 8
+ store <16 x i8> %13, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aese_set64_via_val(i64 %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-LABEL: aese_set64_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -1036,8 +1036,8 @@ define arm_aapcs_vfpcc void @aese_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>*
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
- %4 = bitcast <16 x i8>* %2 to <2 x i64>*
- %5 = load <2 x i64>, <2 x i64>* %4, align 8
+ %4 = bitcast ptr %2 to ptr
+ %5 = load <2 x i64>, ptr %4, align 8
%6 = insertelement <2 x i64> %5, i64 %0, i64 0
%7 = bitcast <2 x i64> %6 to <16 x i8>
%8 = bitcast <16 x i8> %1 to <2 x i64>
@@ -1045,11 +1045,11 @@ define arm_aapcs_vfpcc void @aese_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>*
%10 = bitcast <2 x i64> %9 to <16 x i8>
%11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
%12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
- store <16 x i8> %12, <16 x i8>* %2, align 8
+ store <16 x i8> %12, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_set64_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_set64_cond_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
@@ -1091,15 +1091,15 @@ define arm_aapcs_vfpcc void @aese_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16
br i1 %0, label %5, label %10
5:
- %6 = load i64, i64* %1, align 8
- %7 = bitcast <16 x i8>* %3 to <2 x i64>*
- %8 = load <2 x i64>, <2 x i64>* %7, align 8
+ %6 = load i64, ptr %1, align 8
+ %7 = bitcast ptr %3 to ptr
+ %8 = load <2 x i64>, ptr %7, align 8
%9 = insertelement <2 x i64> %8, i64 %6, i64 0
br label %13
10:
- %11 = bitcast <16 x i8>* %3 to <2 x i64>*
- %12 = load <2 x i64>, <2 x i64>* %11, align 8
+ %11 = bitcast ptr %3 to ptr
+ %12 = load <2 x i64>, ptr %11, align 8
br label %13
13:
@@ -1107,7 +1107,7 @@ define arm_aapcs_vfpcc void @aese_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16
br i1 %0, label %15, label %19
15:
- %16 = load i64, i64* %1, align 8
+ %16 = load i64, ptr %1, align 8
%17 = bitcast <16 x i8> %2 to <2 x i64>
%18 = insertelement <2 x i64> %17, i64 %16, i64 0
br label %21
@@ -1122,11 +1122,11 @@ define arm_aapcs_vfpcc void @aese_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16
%24 = bitcast <2 x i64> %22 to <16 x i8>
%25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
%26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
- store <16 x i8> %26, <16 x i8>* %3, align 8
+ store <16 x i8> %26, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aese_set64_cond_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -1148,8 +1148,8 @@ define arm_aapcs_vfpcc void @aese_set64_cond_via_val(i1 zeroext %0, i64 %1, <16
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
- %5 = bitcast <16 x i8>* %3 to <2 x i64>*
- %6 = load <2 x i64>, <2 x i64>* %5, align 8
+ %5 = bitcast ptr %3 to ptr
+ %6 = load <2 x i64>, ptr %5, align 8
%7 = insertelement <2 x i64> %6, i64 %1, i64 0
%8 = select i1 %0, <2 x i64> %7, <2 x i64> %6
%9 = bitcast <16 x i8> %2 to <2 x i64>
@@ -1159,11 +1159,11 @@ define arm_aapcs_vfpcc void @aese_set64_cond_via_val(i1 zeroext %0, i64 %1, <16
%13 = bitcast <2 x i64> %11 to <16 x i8>
%14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
%15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
- store <16 x i8> %15, <16 x i8>* %3, align 8
+ store <16 x i8> %15, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_set64_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_set64_loop_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
@@ -1206,21 +1206,21 @@ define arm_aapcs_vfpcc void @aese_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8>
; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r11, pc}
- %5 = load i64, i64* %1, align 8
+ %5 = load i64, ptr %1, align 8
%6 = bitcast <16 x i8> %2 to <2 x i64>
%7 = insertelement <2 x i64> %6, i64 %5, i64 0
%8 = bitcast <2 x i64> %7 to <16 x i8>
- %9 = bitcast <16 x i8>* %3 to i64*
- store i64 %5, i64* %9, align 8
+ %9 = bitcast ptr %3 to ptr
+ store i64 %5, ptr %9, align 8
%10 = icmp eq i32 %0, 0
br i1 %10, label %14, label %11
11:
- %12 = load <16 x i8>, <16 x i8>* %3, align 8
+ %12 = load <16 x i8>, ptr %3, align 8
br label %15
13:
- store <16 x i8> %19, <16 x i8>* %3, align 8
+ store <16 x i8> %19, ptr %3, align 8
br label %14
14:
@@ -1236,7 +1236,7 @@ define arm_aapcs_vfpcc void @aese_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8>
br i1 %21, label %13, label %15
}
-define arm_aapcs_vfpcc void @aese_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aese_set64_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -1264,8 +1264,8 @@ define arm_aapcs_vfpcc void @aese_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %
%7 = bitcast <16 x i8> %2 to <2 x i64>
%8 = insertelement <2 x i64> %7, i64 %1, i64 0
%9 = bitcast <2 x i64> %8 to <16 x i8>
- %10 = bitcast <16 x i8>* %3 to <2 x i64>*
- %11 = bitcast <16 x i8>* %3 to i64*
+ %10 = bitcast ptr %3 to ptr
+ %11 = bitcast ptr %3 to ptr
br label %13
12:
@@ -1273,19 +1273,19 @@ define arm_aapcs_vfpcc void @aese_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %
13:
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
- %15 = load <2 x i64>, <2 x i64>* %10, align 8
+ %15 = load <2 x i64>, ptr %10, align 8
%16 = insertelement <2 x i64> %15, i64 %1, i64 0
%17 = bitcast <2 x i64> %16 to <16 x i8>
- store i64 %1, i64* %11, align 8
+ store i64 %1, ptr %11, align 8
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
- store <16 x i8> %19, <16 x i8>* %3, align 8
+ store <16 x i8> %19, ptr %3, align 8
%20 = add nuw i32 %14, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %12, label %13
}
-define arm_aapcs_vfpcc void @aese_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aese_setf16_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_setf16_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
@@ -1309,10 +1309,10 @@ define arm_aapcs_vfpcc void @aese_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
- %4 = bitcast half* %0 to i16*
- %5 = load i16, i16* %4, align 2
- %6 = bitcast <16 x i8>* %2 to <8 x i16>*
- %7 = load <8 x i16>, <8 x i16>* %6, align 8
+ %4 = bitcast ptr %0 to ptr
+ %5 = load i16, ptr %4, align 2
+ %6 = bitcast ptr %2 to ptr
+ %7 = load <8 x i16>, ptr %6, align 8
%8 = insertelement <8 x i16> %7, i16 %5, i64 0
%9 = bitcast <8 x i16> %8 to <16 x i8>
%10 = bitcast <16 x i8> %1 to <8 x i16>
@@ -1320,11 +1320,11 @@ define arm_aapcs_vfpcc void @aese_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i
%12 = bitcast <8 x i16> %11 to <16 x i8>
%13 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %9, <16 x i8> %12)
%14 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %13)
- store <16 x i8> %14, <16 x i8>* %2, align 8
+ store <16 x i8> %14, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_setf16_via_val(half %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aese_setf16_via_val(half %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-LABEL: aese_setf16_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q1, q1, q1
@@ -1336,8 +1336,8 @@ define arm_aapcs_vfpcc void @aese_setf16_via_val(half %0, <16 x i8> %1, <16 x i8
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: bx lr
- %4 = bitcast <16 x i8>* %2 to <8 x i16>*
- %5 = load <8 x i16>, <8 x i16>* %4, align 8
+ %4 = bitcast ptr %2 to ptr
+ %5 = load <8 x i16>, ptr %4, align 8
%6 = bitcast half %0 to i16
%7 = insertelement <8 x i16> %5, i16 %6, i64 0
%8 = bitcast <8 x i16> %7 to <16 x i8>
@@ -1346,11 +1346,11 @@ define arm_aapcs_vfpcc void @aese_setf16_via_val(half %0, <16 x i8> %1, <16 x i8
%11 = bitcast <8 x i16> %10 to <16 x i8>
%12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
%13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
- store <16 x i8> %13, <16 x i8>* %2, align 8
+ store <16 x i8> %13, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_setf16_cond_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -1532,17 +1532,17 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <
br i1 %0, label %5, label %12
5:
- %6 = bitcast half* %1 to i16*
- %7 = load i16, i16* %6, align 2
- %8 = bitcast <16 x i8>* %3 to <8 x i16>*
- %9 = load <8 x i16>, <8 x i16>* %8, align 8
+ %6 = bitcast ptr %1 to ptr
+ %7 = load i16, ptr %6, align 2
+ %8 = bitcast ptr %3 to ptr
+ %9 = load <8 x i16>, ptr %8, align 8
%10 = insertelement <8 x i16> %9, i16 %7, i64 0
%11 = bitcast <8 x i16> %10 to <8 x half>
br label %15
12:
- %13 = bitcast <16 x i8>* %3 to <8 x half>*
- %14 = load <8 x half>, <8 x half>* %13, align 8
+ %13 = bitcast ptr %3 to ptr
+ %14 = load <8 x half>, ptr %13, align 8
br label %15
15:
@@ -1550,8 +1550,8 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <
br i1 %0, label %17, label %23
17:
- %18 = bitcast half* %1 to i16*
- %19 = load i16, i16* %18, align 2
+ %18 = bitcast ptr %1 to ptr
+ %19 = load i16, ptr %18, align 2
%20 = bitcast <16 x i8> %2 to <8 x i16>
%21 = insertelement <8 x i16> %20, i16 %19, i64 0
%22 = bitcast <8 x i16> %21 to <8 x half>
@@ -1567,11 +1567,11 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <
%28 = bitcast <8 x half> %26 to <16 x i8>
%29 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %27, <16 x i8> %28)
%30 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %29)
- store <16 x i8> %30, <16 x i8>* %3, align 8
+ store <16 x i8> %30, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_setf16_cond_via_val:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -1753,16 +1753,16 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <1
br i1 %0, label %5, label %11
5:
- %6 = bitcast <16 x i8>* %3 to <8 x i16>*
- %7 = load <8 x i16>, <8 x i16>* %6, align 8
+ %6 = bitcast ptr %3 to ptr
+ %7 = load <8 x i16>, ptr %6, align 8
%8 = bitcast half %1 to i16
%9 = insertelement <8 x i16> %7, i16 %8, i64 0
%10 = bitcast <8 x i16> %9 to <8 x half>
br label %14
11:
- %12 = bitcast <16 x i8>* %3 to <8 x half>*
- %13 = load <8 x half>, <8 x half>* %12, align 8
+ %12 = bitcast ptr %3 to ptr
+ %13 = load <8 x half>, ptr %12, align 8
br label %14
14:
@@ -1786,11 +1786,11 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <1
%26 = bitcast <8 x half> %24 to <16 x i8>
%27 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %25, <16 x i8> %26)
%28 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %27)
- store <16 x i8> %28, <16 x i8>* %3, align 8
+ store <16 x i8> %28, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_setf16_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aese_setf16_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -1809,22 +1809,22 @@ define arm_aapcs_vfpcc void @aese_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
- %5 = bitcast half* %1 to i16*
- %6 = load i16, i16* %5, align 2
+ %5 = bitcast ptr %1 to ptr
+ %6 = load i16, ptr %5, align 2
%7 = bitcast <16 x i8> %2 to <8 x i16>
%8 = insertelement <8 x i16> %7, i16 %6, i64 0
%9 = bitcast <8 x i16> %8 to <16 x i8>
- %10 = bitcast <16 x i8>* %3 to i16*
- store i16 %6, i16* %10, align 8
+ %10 = bitcast ptr %3 to ptr
+ store i16 %6, ptr %10, align 8
%11 = icmp eq i32 %0, 0
br i1 %11, label %15, label %12
12:
- %13 = load <16 x i8>, <16 x i8>* %3, align 8
+ %13 = load <16 x i8>, ptr %3, align 8
br label %16
14:
- store <16 x i8> %20, <16 x i8>* %3, align 8
+ store <16 x i8> %20, ptr %3, align 8
br label %15
15:
@@ -1840,7 +1840,7 @@ define arm_aapcs_vfpcc void @aese_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8
br i1 %22, label %14, label %16
}
-define arm_aapcs_vfpcc void @aese_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aese_setf16_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q1, q1, q1
@@ -1867,8 +1867,8 @@ define arm_aapcs_vfpcc void @aese_setf16_loop_via_val(i32 %0, half %1, <16 x i8>
%8 = bitcast half %1 to i16
%9 = insertelement <8 x i16> %7, i16 %8, i64 0
%10 = bitcast <8 x i16> %9 to <16 x i8>
- %11 = bitcast <16 x i8>* %3 to <8 x i16>*
- %12 = bitcast <16 x i8>* %3 to half*
+ %11 = bitcast ptr %3 to ptr
+ %12 = bitcast ptr %3 to ptr
br label %14
13:
@@ -1876,19 +1876,19 @@ define arm_aapcs_vfpcc void @aese_setf16_loop_via_val(i32 %0, half %1, <16 x i8>
14:
%15 = phi i32 [ 0, %6 ], [ %21, %14 ]
- %16 = load <8 x i16>, <8 x i16>* %11, align 8
+ %16 = load <8 x i16>, ptr %11, align 8
%17 = insertelement <8 x i16> %16, i16 %8, i64 0
%18 = bitcast <8 x i16> %17 to <16 x i8>
- store half %1, half* %12, align 8
+ store half %1, ptr %12, align 8
%19 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %18, <16 x i8> %10)
%20 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %19)
- store <16 x i8> %20, <16 x i8>* %3, align 8
+ store <16 x i8> %20, ptr %3, align 8
%21 = add nuw i32 %15, 1
%22 = icmp eq i32 %21, %0
br i1 %22, label %13, label %14
}
-define arm_aapcs_vfpcc void @aese_setf32_via_ptr(float* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aese_setf32_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-LABEL: aese_setf32_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vldr s0, [r0]
@@ -1900,9 +1900,9 @@ define arm_aapcs_vfpcc void @aese_setf32_via_ptr(float* %0, <16 x i8> %1, <16 x
; CHECK-FIX-NEXT: aesmc.8 q8, q1
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
- %4 = load float, float* %0, align 4
- %5 = bitcast <16 x i8>* %2 to <4 x float>*
- %6 = load <4 x float>, <4 x float>* %5, align 8
+ %4 = load float, ptr %0, align 4
+ %5 = bitcast ptr %2 to ptr
+ %6 = load <4 x float>, ptr %5, align 8
%7 = insertelement <4 x float> %6, float %4, i64 0
%8 = bitcast <4 x float> %7 to <16 x i8>
%9 = bitcast <16 x i8> %1 to <4 x float>
@@ -1910,11 +1910,11 @@ define arm_aapcs_vfpcc void @aese_setf32_via_ptr(float* %0, <16 x i8> %1, <16 x
%11 = bitcast <4 x float> %10 to <16 x i8>
%12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
%13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
- store <16 x i8> %13, <16 x i8>* %2, align 8
+ store <16 x i8> %13, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_setf32_via_val(float %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aese_setf32_via_val(float %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-LABEL: aese_setf32_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vmov.f32 s4, s0
@@ -1926,8 +1926,8 @@ define arm_aapcs_vfpcc void @aese_setf32_via_val(float %0, <16 x i8> %1, <16 x i
; CHECK-FIX-NEXT: aesmc.8 q8, q0
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: bx lr
- %4 = bitcast <16 x i8>* %2 to <4 x float>*
- %5 = load <4 x float>, <4 x float>* %4, align 8
+ %4 = bitcast ptr %2 to ptr
+ %5 = load <4 x float>, ptr %4, align 8
%6 = insertelement <4 x float> %5, float %0, i64 0
%7 = bitcast <4 x float> %6 to <16 x i8>
%8 = bitcast <16 x i8> %1 to <4 x float>
@@ -1935,11 +1935,11 @@ define arm_aapcs_vfpcc void @aese_setf32_via_val(float %0, <16 x i8> %1, <16 x i
%10 = bitcast <4 x float> %9 to <16 x i8>
%11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
%12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
- store <16 x i8> %12, <16 x i8>* %2, align 8
+ store <16 x i8> %12, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_setf32_cond_via_ptr(i1 zeroext %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_setf32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aese_setf32_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -1965,15 +1965,15 @@ define arm_aapcs_vfpcc void @aese_setf32_cond_via_ptr(i1 zeroext %0, float* %1,
br i1 %0, label %5, label %10
5:
- %6 = load float, float* %1, align 4
- %7 = bitcast <16 x i8>* %3 to <4 x float>*
- %8 = load <4 x float>, <4 x float>* %7, align 8
+ %6 = load float, ptr %1, align 4
+ %7 = bitcast ptr %3 to ptr
+ %8 = load <4 x float>, ptr %7, align 8
%9 = insertelement <4 x float> %8, float %6, i64 0
br label %13
10:
- %11 = bitcast <16 x i8>* %3 to <4 x float>*
- %12 = load <4 x float>, <4 x float>* %11, align 8
+ %11 = bitcast ptr %3 to ptr
+ %12 = load <4 x float>, ptr %11, align 8
br label %13
13:
@@ -1981,7 +1981,7 @@ define arm_aapcs_vfpcc void @aese_setf32_cond_via_ptr(i1 zeroext %0, float* %1,
br i1 %0, label %15, label %19
15:
- %16 = load float, float* %1, align 4
+ %16 = load float, ptr %1, align 4
%17 = bitcast <16 x i8> %2 to <4 x float>
%18 = insertelement <4 x float> %17, float %16, i64 0
br label %21
@@ -1996,11 +1996,11 @@ define arm_aapcs_vfpcc void @aese_setf32_cond_via_ptr(i1 zeroext %0, float* %1,
%24 = bitcast <4 x float> %22 to <16 x i8>
%25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
%26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
- store <16 x i8> %26, <16 x i8>* %3, align 8
+ store <16 x i8> %26, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_setf32_cond_via_val:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
@@ -2028,8 +2028,8 @@ define arm_aapcs_vfpcc void @aese_setf32_cond_via_val(i1 zeroext %0, float %1, <
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q2
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
- %5 = bitcast <16 x i8>* %3 to <4 x float>*
- %6 = load <4 x float>, <4 x float>* %5, align 8
+ %5 = bitcast ptr %3 to ptr
+ %6 = load <4 x float>, ptr %5, align 8
%7 = insertelement <4 x float> %6, float %1, i64 0
%8 = select i1 %0, <4 x float> %7, <4 x float> %6
%9 = bitcast <16 x i8> %2 to <4 x float>
@@ -2039,11 +2039,11 @@ define arm_aapcs_vfpcc void @aese_setf32_cond_via_val(i1 zeroext %0, float %1, <
%13 = bitcast <4 x float> %11 to <16 x i8>
%14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
%15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
- store <16 x i8> %15, <16 x i8>* %3, align 8
+ store <16 x i8> %15, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aese_setf32_loop_via_ptr(i32 %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_setf32_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_setf32_loop_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vldr s4, [r1]
@@ -2081,21 +2081,21 @@ define arm_aapcs_vfpcc void @aese_setf32_loop_via_ptr(i32 %0, float* %1, <16 x i
; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: bx lr
- %5 = load float, float* %1, align 4
+ %5 = load float, ptr %1, align 4
%6 = bitcast <16 x i8> %2 to <4 x float>
%7 = insertelement <4 x float> %6, float %5, i64 0
%8 = bitcast <4 x float> %7 to <16 x i8>
- %9 = bitcast <16 x i8>* %3 to float*
- store float %5, float* %9, align 8
+ %9 = bitcast ptr %3 to ptr
+ store float %5, ptr %9, align 8
%10 = icmp eq i32 %0, 0
br i1 %10, label %14, label %11
11:
- %12 = load <16 x i8>, <16 x i8>* %3, align 8
+ %12 = load <16 x i8>, ptr %3, align 8
br label %15
13:
- store <16 x i8> %19, <16 x i8>* %3, align 8
+ store <16 x i8> %19, ptr %3, align 8
br label %14
14:
@@ -2111,7 +2111,7 @@ define arm_aapcs_vfpcc void @aese_setf32_loop_via_ptr(i32 %0, float* %1, <16 x i
br i1 %21, label %13, label %15
}
-define arm_aapcs_vfpcc void @aese_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aese_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_setf32_loop_via_val:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
@@ -2156,8 +2156,8 @@ define arm_aapcs_vfpcc void @aese_setf32_loop_via_val(i32 %0, float %1, <16 x i8
%7 = bitcast <16 x i8> %2 to <4 x float>
%8 = insertelement <4 x float> %7, float %1, i64 0
%9 = bitcast <4 x float> %8 to <16 x i8>
- %10 = bitcast <16 x i8>* %3 to <4 x float>*
- %11 = bitcast <16 x i8>* %3 to float*
+ %10 = bitcast ptr %3 to ptr
+ %11 = bitcast ptr %3 to ptr
br label %13
12:
@@ -2165,19 +2165,19 @@ define arm_aapcs_vfpcc void @aese_setf32_loop_via_val(i32 %0, float %1, <16 x i8
13:
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
- %15 = load <4 x float>, <4 x float>* %10, align 8
+ %15 = load <4 x float>, ptr %10, align 8
%16 = insertelement <4 x float> %15, float %1, i64 0
%17 = bitcast <4 x float> %16 to <16 x i8>
- store float %1, float* %11, align 8
+ store float %1, ptr %11, align 8
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
- store <16 x i8> %19, <16 x i8>* %3, align 8
+ store <16 x i8> %19, ptr %3, align 8
%20 = add nuw i32 %14, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %12, label %13
}
-define arm_aapcs_vfpcc void @aesd_zero(<16 x i8>* %0) nounwind {
+define arm_aapcs_vfpcc void @aesd_zero(ptr %0) nounwind {
; CHECK-FIX-LABEL: aesd_zero:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
@@ -2186,14 +2186,14 @@ define arm_aapcs_vfpcc void @aesd_zero(<16 x i8>* %0) nounwind {
; CHECK-FIX-NEXT: aesimc.8 q8, q9
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: bx lr
- %2 = load <16 x i8>, <16 x i8>* %0, align 8
+ %2 = load <16 x i8>, ptr %0, align 8
%3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> zeroinitializer, <16 x i8> %2)
%4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3)
- store <16 x i8> %4, <16 x i8>* %0, align 8
+ store <16 x i8> %4, ptr %0, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_via_call1(<16 x i8>* %0) nounwind {
+define arm_aapcs_vfpcc void @aesd_via_call1(ptr %0) nounwind {
; CHECK-FIX-LABEL: aesd_via_call1:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: .save {r4, lr}
@@ -2207,14 +2207,14 @@ define arm_aapcs_vfpcc void @aesd_via_call1(<16 x i8>* %0) nounwind {
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: pop {r4, pc}
%2 = call arm_aapcs_vfpcc <16 x i8> @get_input()
- %3 = load <16 x i8>, <16 x i8>* %0, align 8
+ %3 = load <16 x i8>, ptr %0, align 8
%4 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %2, <16 x i8> %3)
%5 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %4)
- store <16 x i8> %5, <16 x i8>* %0, align 8
+ store <16 x i8> %5, ptr %0, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_via_call2(half %0, <16 x i8>* %1) nounwind {
+define arm_aapcs_vfpcc void @aesd_via_call2(half %0, ptr %1) nounwind {
; CHECK-FIX-LABEL: aesd_via_call2:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: .save {r4, lr}
@@ -2228,14 +2228,14 @@ define arm_aapcs_vfpcc void @aesd_via_call2(half %0, <16 x i8>* %1) nounwind {
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: pop {r4, pc}
%3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf16(half %0)
- %4 = load <16 x i8>, <16 x i8>* %1, align 8
+ %4 = load <16 x i8>, ptr %1, align 8
%5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
%6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
- store <16 x i8> %6, <16 x i8>* %1, align 8
+ store <16 x i8> %6, ptr %1, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_via_call3(float %0, <16 x i8>* %1) nounwind {
+define arm_aapcs_vfpcc void @aesd_via_call3(float %0, ptr %1) nounwind {
; CHECK-FIX-LABEL: aesd_via_call3:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: .save {r4, lr}
@@ -2249,14 +2249,14 @@ define arm_aapcs_vfpcc void @aesd_via_call3(float %0, <16 x i8>* %1) nounwind {
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: pop {r4, pc}
%3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf32(float %0)
- %4 = load <16 x i8>, <16 x i8>* %1, align 8
+ %4 = load <16 x i8>, ptr %1, align 8
%5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
%6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
- store <16 x i8> %6, <16 x i8>* %1, align 8
+ store <16 x i8> %6, ptr %1, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_once_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind {
+define arm_aapcs_vfpcc void @aesd_once_via_ptr(ptr %0, ptr %1) nounwind {
; CHECK-FIX-LABEL: aesd_once_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
@@ -2265,11 +2265,11 @@ define arm_aapcs_vfpcc void @aesd_once_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nou
; CHECK-FIX-NEXT: aesimc.8 q8, q9
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
- %3 = load <16 x i8>, <16 x i8>* %1, align 8
- %4 = load <16 x i8>, <16 x i8>* %0, align 8
+ %3 = load <16 x i8>, ptr %1, align 8
+ %4 = load <16 x i8>, ptr %0, align 8
%5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
%6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
- store <16 x i8> %6, <16 x i8>* %1, align 8
+ store <16 x i8> %6, ptr %1, align 8
ret void
}
@@ -2286,7 +2286,7 @@ define arm_aapcs_vfpcc <16 x i8> @aesd_once_via_val(<16 x i8> %0, <16 x i8> %1)
ret <16 x i8> %4
}
-define arm_aapcs_vfpcc void @aesd_twice_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind {
+define arm_aapcs_vfpcc void @aesd_twice_via_ptr(ptr %0, ptr %1) nounwind {
; CHECK-FIX-LABEL: aesd_twice_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
@@ -2299,15 +2299,15 @@ define arm_aapcs_vfpcc void @aesd_twice_via_ptr(<16 x i8>* %0, <16 x i8>* %1) no
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
- %3 = load <16 x i8>, <16 x i8>* %1, align 8
- %4 = load <16 x i8>, <16 x i8>* %0, align 8
+ %3 = load <16 x i8>, ptr %1, align 8
+ %4 = load <16 x i8>, ptr %0, align 8
%5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
%6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
- store <16 x i8> %6, <16 x i8>* %1, align 8
- %7 = load <16 x i8>, <16 x i8>* %0, align 8
+ store <16 x i8> %6, ptr %1, align 8
+ %7 = load <16 x i8>, ptr %0, align 8
%8 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %6, <16 x i8> %7)
%9 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %8)
- store <16 x i8> %9, <16 x i8>* %1, align 8
+ store <16 x i8> %9, ptr %1, align 8
ret void
}
@@ -2329,7 +2329,7 @@ define arm_aapcs_vfpcc <16 x i8> @aesd_twice_via_val(<16 x i8> %0, <16 x i8> %1)
ret <16 x i8> %6
}
-define arm_aapcs_vfpcc void @aesd_loop_via_ptr(i32 %0, <16 x i8>* %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aesd_loop_via_ptr(i32 %0, ptr %1, ptr %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_loop_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
@@ -2367,11 +2367,11 @@ define arm_aapcs_vfpcc void @aesd_loop_via_ptr(i32 %0, <16 x i8>* %1, <16 x i8>*
6:
%7 = phi i32 [ %12, %6 ], [ 0, %3 ]
- %8 = load <16 x i8>, <16 x i8>* %2, align 8
- %9 = load <16 x i8>, <16 x i8>* %1, align 8
+ %8 = load <16 x i8>, ptr %2, align 8
+ %9 = load <16 x i8>, ptr %1, align 8
%10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %9)
%11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10)
- store <16 x i8> %11, <16 x i8>* %2, align 8
+ store <16 x i8> %11, ptr %2, align 8
%12 = add nuw i32 %7, 1
%13 = icmp eq i32 %12, %0
br i1 %13, label %5, label %6
@@ -2409,7 +2409,7 @@ define arm_aapcs_vfpcc <16 x i8> @aesd_loop_via_val(i32 %0, <16 x i8> %1, <16 x
br i1 %13, label %5, label %7
}
-define arm_aapcs_vfpcc void @aesd_set8_via_ptr(i8* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aesd_set8_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_set8_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
@@ -2433,17 +2433,17 @@ define arm_aapcs_vfpcc void @aesd_set8_via_ptr(i8* %0, <16 x i8> %1, <16 x i8>*
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
- %4 = load i8, i8* %0, align 1
- %5 = load <16 x i8>, <16 x i8>* %2, align 8
+ %4 = load i8, ptr %0, align 1
+ %5 = load <16 x i8>, ptr %2, align 8
%6 = insertelement <16 x i8> %5, i8 %4, i64 0
%7 = insertelement <16 x i8> %1, i8 %4, i64 0
%8 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %6, <16 x i8> %7)
%9 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %8)
- store <16 x i8> %9, <16 x i8>* %2, align 8
+ store <16 x i8> %9, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aesd_set8_via_val(i8 zeroext %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-LABEL: aesd_set8_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -2454,16 +2454,16 @@ define arm_aapcs_vfpcc void @aesd_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
- %4 = load <16 x i8>, <16 x i8>* %2, align 8
+ %4 = load <16 x i8>, ptr %2, align 8
%5 = insertelement <16 x i8> %4, i8 %0, i64 0
%6 = insertelement <16 x i8> %1, i8 %0, i64 0
%7 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %5, <16 x i8> %6)
%8 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %7)
- store <16 x i8> %8, <16 x i8>* %2, align 8
+ store <16 x i8> %8, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_set8_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aesd_set8_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -2489,13 +2489,13 @@ define arm_aapcs_vfpcc void @aesd_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x
br i1 %0, label %5, label %9
5:
- %6 = load i8, i8* %1, align 1
- %7 = load <16 x i8>, <16 x i8>* %3, align 8
+ %6 = load i8, ptr %1, align 1
+ %7 = load <16 x i8>, ptr %3, align 8
%8 = insertelement <16 x i8> %7, i8 %6, i64 0
br label %11
9:
- %10 = load <16 x i8>, <16 x i8>* %3, align 8
+ %10 = load <16 x i8>, ptr %3, align 8
br label %11
11:
@@ -2503,7 +2503,7 @@ define arm_aapcs_vfpcc void @aesd_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x
br i1 %0, label %13, label %16
13:
- %14 = load i8, i8* %1, align 1
+ %14 = load i8, ptr %1, align 1
%15 = insertelement <16 x i8> %2, i8 %14, i64 0
br label %16
@@ -2511,11 +2511,11 @@ define arm_aapcs_vfpcc void @aesd_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x
%17 = phi <16 x i8> [ %15, %13 ], [ %2, %11 ]
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %17)
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
- store <16 x i8> %19, <16 x i8>* %3, align 8
+ store <16 x i8> %19, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aesd_set8_cond_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -2534,18 +2534,18 @@ define arm_aapcs_vfpcc void @aesd_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
- %5 = load <16 x i8>, <16 x i8>* %3, align 8
+ %5 = load <16 x i8>, ptr %3, align 8
%6 = insertelement <16 x i8> %5, i8 %1, i64 0
%7 = select i1 %0, <16 x i8> %6, <16 x i8> %5
%8 = insertelement <16 x i8> %2, i8 %1, i64 0
%9 = select i1 %0, <16 x i8> %8, <16 x i8> %2
%10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %9)
%11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10)
- store <16 x i8> %11, <16 x i8>* %3, align 8
+ store <16 x i8> %11, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_set8_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aesd_set8_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -2564,19 +2564,19 @@ define arm_aapcs_vfpcc void @aesd_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
- %5 = load i8, i8* %1, align 1
+ %5 = load i8, ptr %1, align 1
%6 = insertelement <16 x i8> %2, i8 %5, i64 0
- %7 = getelementptr inbounds <16 x i8>, <16 x i8>* %3, i32 0, i32 0
- store i8 %5, i8* %7, align 8
+ %7 = getelementptr inbounds <16 x i8>, ptr %3, i32 0, i32 0
+ store i8 %5, ptr %7, align 8
%8 = icmp eq i32 %0, 0
br i1 %8, label %12, label %9
9:
- %10 = load <16 x i8>, <16 x i8>* %3, align 8
+ %10 = load <16 x i8>, ptr %3, align 8
br label %13
11:
- store <16 x i8> %17, <16 x i8>* %3, align 8
+ store <16 x i8> %17, ptr %3, align 8
br label %12
12:
@@ -2592,7 +2592,7 @@ define arm_aapcs_vfpcc void @aesd_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2
br i1 %19, label %11, label %13
}
-define arm_aapcs_vfpcc void @aesd_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aesd_set8_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -2615,11 +2615,11 @@ define arm_aapcs_vfpcc void @aesd_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x
6:
%7 = insertelement <16 x i8> %2, i8 %1, i64 0
- %8 = load <16 x i8>, <16 x i8>* %3, align 8
+ %8 = load <16 x i8>, ptr %3, align 8
br label %11
9:
- store <16 x i8> %16, <16 x i8>* %3, align 8
+ store <16 x i8> %16, ptr %3, align 8
br label %10
10:
@@ -2636,7 +2636,7 @@ define arm_aapcs_vfpcc void @aesd_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x
br i1 %18, label %9, label %11
}
-define arm_aapcs_vfpcc void @aesd_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aesd_set16_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_set16_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
@@ -2660,9 +2660,9 @@ define arm_aapcs_vfpcc void @aesd_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8>
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
- %4 = load i16, i16* %0, align 2
- %5 = bitcast <16 x i8>* %2 to <8 x i16>*
- %6 = load <8 x i16>, <8 x i16>* %5, align 8
+ %4 = load i16, ptr %0, align 2
+ %5 = bitcast ptr %2 to ptr
+ %6 = load <8 x i16>, ptr %5, align 8
%7 = insertelement <8 x i16> %6, i16 %4, i64 0
%8 = bitcast <8 x i16> %7 to <16 x i8>
%9 = bitcast <16 x i8> %1 to <8 x i16>
@@ -2670,11 +2670,11 @@ define arm_aapcs_vfpcc void @aesd_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8>
%11 = bitcast <8 x i16> %10 to <16 x i8>
%12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
%13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
- store <16 x i8> %13, <16 x i8>* %2, align 8
+ store <16 x i8> %13, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_set16_via_val(i16 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aesd_set16_via_val(i16 zeroext %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-LABEL: aesd_set16_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -2685,8 +2685,8 @@ define arm_aapcs_vfpcc void @aesd_set16_via_val(i16 zeroext %0, <16 x i8> %1, <1
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
- %4 = bitcast <16 x i8>* %2 to <8 x i16>*
- %5 = load <8 x i16>, <8 x i16>* %4, align 8
+ %4 = bitcast ptr %2 to ptr
+ %5 = load <8 x i16>, ptr %4, align 8
%6 = insertelement <8 x i16> %5, i16 %0, i64 0
%7 = bitcast <8 x i16> %6 to <16 x i8>
%8 = bitcast <16 x i8> %1 to <8 x i16>
@@ -2694,11 +2694,11 @@ define arm_aapcs_vfpcc void @aesd_set16_via_val(i16 zeroext %0, <16 x i8> %1, <1
%10 = bitcast <8 x i16> %9 to <16 x i8>
%11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
%12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
- store <16 x i8> %12, <16 x i8>* %2, align 8
+ store <16 x i8> %12, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_set16_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aesd_set16_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -2724,15 +2724,15 @@ define arm_aapcs_vfpcc void @aesd_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16
br i1 %0, label %5, label %10
5:
- %6 = load i16, i16* %1, align 2
- %7 = bitcast <16 x i8>* %3 to <8 x i16>*
- %8 = load <8 x i16>, <8 x i16>* %7, align 8
+ %6 = load i16, ptr %1, align 2
+ %7 = bitcast ptr %3 to ptr
+ %8 = load <8 x i16>, ptr %7, align 8
%9 = insertelement <8 x i16> %8, i16 %6, i64 0
br label %13
10:
- %11 = bitcast <16 x i8>* %3 to <8 x i16>*
- %12 = load <8 x i16>, <8 x i16>* %11, align 8
+ %11 = bitcast ptr %3 to ptr
+ %12 = load <8 x i16>, ptr %11, align 8
br label %13
13:
@@ -2740,7 +2740,7 @@ define arm_aapcs_vfpcc void @aesd_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16
br i1 %0, label %15, label %19
15:
- %16 = load i16, i16* %1, align 2
+ %16 = load i16, ptr %1, align 2
%17 = bitcast <16 x i8> %2 to <8 x i16>
%18 = insertelement <8 x i16> %17, i16 %16, i64 0
br label %21
@@ -2755,11 +2755,11 @@ define arm_aapcs_vfpcc void @aesd_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16
%24 = bitcast <8 x i16> %22 to <16 x i8>
%25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
%26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
- store <16 x i8> %26, <16 x i8>* %3, align 8
+ store <16 x i8> %26, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aesd_set16_cond_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -2778,8 +2778,8 @@ define arm_aapcs_vfpcc void @aesd_set16_cond_via_val(i1 zeroext %0, i16 zeroext
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
- %5 = bitcast <16 x i8>* %3 to <8 x i16>*
- %6 = load <8 x i16>, <8 x i16>* %5, align 8
+ %5 = bitcast ptr %3 to ptr
+ %6 = load <8 x i16>, ptr %5, align 8
%7 = insertelement <8 x i16> %6, i16 %1, i64 0
%8 = select i1 %0, <8 x i16> %7, <8 x i16> %6
%9 = bitcast <16 x i8> %2 to <8 x i16>
@@ -2789,11 +2789,11 @@ define arm_aapcs_vfpcc void @aesd_set16_cond_via_val(i1 zeroext %0, i16 zeroext
%13 = bitcast <8 x i16> %11 to <16 x i8>
%14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
%15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
- store <16 x i8> %15, <16 x i8>* %3, align 8
+ store <16 x i8> %15, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_set16_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aesd_set16_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -2812,21 +2812,21 @@ define arm_aapcs_vfpcc void @aesd_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8>
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
- %5 = load i16, i16* %1, align 2
+ %5 = load i16, ptr %1, align 2
%6 = bitcast <16 x i8> %2 to <8 x i16>
%7 = insertelement <8 x i16> %6, i16 %5, i64 0
%8 = bitcast <8 x i16> %7 to <16 x i8>
- %9 = bitcast <16 x i8>* %3 to i16*
- store i16 %5, i16* %9, align 8
+ %9 = bitcast ptr %3 to ptr
+ store i16 %5, ptr %9, align 8
%10 = icmp eq i32 %0, 0
br i1 %10, label %14, label %11
11:
- %12 = load <16 x i8>, <16 x i8>* %3, align 8
+ %12 = load <16 x i8>, ptr %3, align 8
br label %15
13:
- store <16 x i8> %19, <16 x i8>* %3, align 8
+ store <16 x i8> %19, ptr %3, align 8
br label %14
14:
@@ -2842,7 +2842,7 @@ define arm_aapcs_vfpcc void @aesd_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8>
br i1 %21, label %13, label %15
}
-define arm_aapcs_vfpcc void @aesd_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aesd_set16_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -2867,8 +2867,8 @@ define arm_aapcs_vfpcc void @aesd_set16_loop_via_val(i32 %0, i16 zeroext %1, <16
%7 = bitcast <16 x i8> %2 to <8 x i16>
%8 = insertelement <8 x i16> %7, i16 %1, i64 0
%9 = bitcast <8 x i16> %8 to <16 x i8>
- %10 = bitcast <16 x i8>* %3 to <8 x i16>*
- %11 = bitcast <16 x i8>* %3 to i16*
+ %10 = bitcast ptr %3 to ptr
+ %11 = bitcast ptr %3 to ptr
br label %13
12:
@@ -2876,19 +2876,19 @@ define arm_aapcs_vfpcc void @aesd_set16_loop_via_val(i32 %0, i16 zeroext %1, <16
13:
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
- %15 = load <8 x i16>, <8 x i16>* %10, align 8
+ %15 = load <8 x i16>, ptr %10, align 8
%16 = insertelement <8 x i16> %15, i16 %1, i64 0
%17 = bitcast <8 x i16> %16 to <16 x i8>
- store i16 %1, i16* %11, align 8
+ store i16 %1, ptr %11, align 8
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
- store <16 x i8> %19, <16 x i8>* %3, align 8
+ store <16 x i8> %19, ptr %3, align 8
%20 = add nuw i32 %14, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %12, label %13
}
-define arm_aapcs_vfpcc void @aesd_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aesd_set32_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_set32_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
@@ -2912,9 +2912,9 @@ define arm_aapcs_vfpcc void @aesd_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8>
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
- %4 = load i32, i32* %0, align 4
- %5 = bitcast <16 x i8>* %2 to <4 x i32>*
- %6 = load <4 x i32>, <4 x i32>* %5, align 8
+ %4 = load i32, ptr %0, align 4
+ %5 = bitcast ptr %2 to ptr
+ %6 = load <4 x i32>, ptr %5, align 8
%7 = insertelement <4 x i32> %6, i32 %4, i64 0
%8 = bitcast <4 x i32> %7 to <16 x i8>
%9 = bitcast <16 x i8> %1 to <4 x i32>
@@ -2922,11 +2922,11 @@ define arm_aapcs_vfpcc void @aesd_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8>
%11 = bitcast <4 x i32> %10 to <16 x i8>
%12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
%13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
- store <16 x i8> %13, <16 x i8>* %2, align 8
+ store <16 x i8> %13, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aesd_set32_via_val(i32 %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-LABEL: aesd_set32_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -2937,8 +2937,8 @@ define arm_aapcs_vfpcc void @aesd_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>*
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
- %4 = bitcast <16 x i8>* %2 to <4 x i32>*
- %5 = load <4 x i32>, <4 x i32>* %4, align 8
+ %4 = bitcast ptr %2 to ptr
+ %5 = load <4 x i32>, ptr %4, align 8
%6 = insertelement <4 x i32> %5, i32 %0, i64 0
%7 = bitcast <4 x i32> %6 to <16 x i8>
%8 = bitcast <16 x i8> %1 to <4 x i32>
@@ -2946,11 +2946,11 @@ define arm_aapcs_vfpcc void @aesd_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>*
%10 = bitcast <4 x i32> %9 to <16 x i8>
%11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
%12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
- store <16 x i8> %12, <16 x i8>* %2, align 8
+ store <16 x i8> %12, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_set32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aesd_set32_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -2976,15 +2976,15 @@ define arm_aapcs_vfpcc void @aesd_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16
br i1 %0, label %5, label %10
5:
- %6 = load i32, i32* %1, align 4
- %7 = bitcast <16 x i8>* %3 to <4 x i32>*
- %8 = load <4 x i32>, <4 x i32>* %7, align 8
+ %6 = load i32, ptr %1, align 4
+ %7 = bitcast ptr %3 to ptr
+ %8 = load <4 x i32>, ptr %7, align 8
%9 = insertelement <4 x i32> %8, i32 %6, i64 0
br label %13
10:
- %11 = bitcast <16 x i8>* %3 to <4 x i32>*
- %12 = load <4 x i32>, <4 x i32>* %11, align 8
+ %11 = bitcast ptr %3 to ptr
+ %12 = load <4 x i32>, ptr %11, align 8
br label %13
13:
@@ -2992,7 +2992,7 @@ define arm_aapcs_vfpcc void @aesd_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16
br i1 %0, label %15, label %19
15:
- %16 = load i32, i32* %1, align 4
+ %16 = load i32, ptr %1, align 4
%17 = bitcast <16 x i8> %2 to <4 x i32>
%18 = insertelement <4 x i32> %17, i32 %16, i64 0
br label %21
@@ -3007,11 +3007,11 @@ define arm_aapcs_vfpcc void @aesd_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16
%24 = bitcast <4 x i32> %22 to <16 x i8>
%25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
%26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
- store <16 x i8> %26, <16 x i8>* %3, align 8
+ store <16 x i8> %26, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aesd_set32_cond_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -3030,8 +3030,8 @@ define arm_aapcs_vfpcc void @aesd_set32_cond_via_val(i1 zeroext %0, i32 %1, <16
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
- %5 = bitcast <16 x i8>* %3 to <4 x i32>*
- %6 = load <4 x i32>, <4 x i32>* %5, align 8
+ %5 = bitcast ptr %3 to ptr
+ %6 = load <4 x i32>, ptr %5, align 8
%7 = insertelement <4 x i32> %6, i32 %1, i64 0
%8 = select i1 %0, <4 x i32> %7, <4 x i32> %6
%9 = bitcast <16 x i8> %2 to <4 x i32>
@@ -3041,11 +3041,11 @@ define arm_aapcs_vfpcc void @aesd_set32_cond_via_val(i1 zeroext %0, i32 %1, <16
%13 = bitcast <4 x i32> %11 to <16 x i8>
%14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
%15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
- store <16 x i8> %15, <16 x i8>* %3, align 8
+ store <16 x i8> %15, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_set32_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aesd_set32_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -3064,21 +3064,21 @@ define arm_aapcs_vfpcc void @aesd_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8>
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
- %5 = load i32, i32* %1, align 4
+ %5 = load i32, ptr %1, align 4
%6 = bitcast <16 x i8> %2 to <4 x i32>
%7 = insertelement <4 x i32> %6, i32 %5, i64 0
%8 = bitcast <4 x i32> %7 to <16 x i8>
- %9 = bitcast <16 x i8>* %3 to i32*
- store i32 %5, i32* %9, align 8
+ %9 = bitcast ptr %3 to ptr
+ store i32 %5, ptr %9, align 8
%10 = icmp eq i32 %0, 0
br i1 %10, label %14, label %11
11:
- %12 = load <16 x i8>, <16 x i8>* %3, align 8
+ %12 = load <16 x i8>, ptr %3, align 8
br label %15
13:
- store <16 x i8> %19, <16 x i8>* %3, align 8
+ store <16 x i8> %19, ptr %3, align 8
br label %14
14:
@@ -3094,7 +3094,7 @@ define arm_aapcs_vfpcc void @aesd_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8>
br i1 %21, label %13, label %15
}
-define arm_aapcs_vfpcc void @aesd_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aesd_set32_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -3119,8 +3119,8 @@ define arm_aapcs_vfpcc void @aesd_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %
%7 = bitcast <16 x i8> %2 to <4 x i32>
%8 = insertelement <4 x i32> %7, i32 %1, i64 0
%9 = bitcast <4 x i32> %8 to <16 x i8>
- %10 = bitcast <16 x i8>* %3 to <4 x i32>*
- %11 = bitcast <16 x i8>* %3 to i32*
+ %10 = bitcast ptr %3 to ptr
+ %11 = bitcast ptr %3 to ptr
br label %13
12:
@@ -3128,19 +3128,19 @@ define arm_aapcs_vfpcc void @aesd_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %
13:
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
- %15 = load <4 x i32>, <4 x i32>* %10, align 8
+ %15 = load <4 x i32>, ptr %10, align 8
%16 = insertelement <4 x i32> %15, i32 %1, i64 0
%17 = bitcast <4 x i32> %16 to <16 x i8>
- store i32 %1, i32* %11, align 8
+ store i32 %1, ptr %11, align 8
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
- store <16 x i8> %19, <16 x i8>* %3, align 8
+ store <16 x i8> %19, ptr %3, align 8
%20 = add nuw i32 %14, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %12, label %13
}
-define arm_aapcs_vfpcc void @aesd_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aesd_set64_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_set64_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
@@ -3162,9 +3162,9 @@ define arm_aapcs_vfpcc void @aesd_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
- %4 = load i64, i64* %0, align 8
- %5 = bitcast <16 x i8>* %2 to <2 x i64>*
- %6 = load <2 x i64>, <2 x i64>* %5, align 8
+ %4 = load i64, ptr %0, align 8
+ %5 = bitcast ptr %2 to ptr
+ %6 = load <2 x i64>, ptr %5, align 8
%7 = insertelement <2 x i64> %6, i64 %4, i64 0
%8 = bitcast <2 x i64> %7 to <16 x i8>
%9 = bitcast <16 x i8> %1 to <2 x i64>
@@ -3172,11 +3172,11 @@ define arm_aapcs_vfpcc void @aesd_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>
%11 = bitcast <2 x i64> %10 to <16 x i8>
%12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
%13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
- store <16 x i8> %13, <16 x i8>* %2, align 8
+ store <16 x i8> %13, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-LABEL: aesd_set64_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -3189,8 +3189,8 @@ define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>*
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
- %4 = bitcast <16 x i8>* %2 to <2 x i64>*
- %5 = load <2 x i64>, <2 x i64>* %4, align 8
+ %4 = bitcast ptr %2 to ptr
+ %5 = load <2 x i64>, ptr %4, align 8
%6 = insertelement <2 x i64> %5, i64 %0, i64 0
%7 = bitcast <2 x i64> %6 to <16 x i8>
%8 = bitcast <16 x i8> %1 to <2 x i64>
@@ -3198,11 +3198,11 @@ define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>*
%10 = bitcast <2 x i64> %9 to <16 x i8>
%11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
%12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
- store <16 x i8> %12, <16 x i8>* %2, align 8
+ store <16 x i8> %12, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_set64_cond_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
@@ -3244,15 +3244,15 @@ define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16
br i1 %0, label %5, label %10
5:
- %6 = load i64, i64* %1, align 8
- %7 = bitcast <16 x i8>* %3 to <2 x i64>*
- %8 = load <2 x i64>, <2 x i64>* %7, align 8
+ %6 = load i64, ptr %1, align 8
+ %7 = bitcast ptr %3 to ptr
+ %8 = load <2 x i64>, ptr %7, align 8
%9 = insertelement <2 x i64> %8, i64 %6, i64 0
br label %13
10:
- %11 = bitcast <16 x i8>* %3 to <2 x i64>*
- %12 = load <2 x i64>, <2 x i64>* %11, align 8
+ %11 = bitcast ptr %3 to ptr
+ %12 = load <2 x i64>, ptr %11, align 8
br label %13
13:
@@ -3260,7 +3260,7 @@ define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16
br i1 %0, label %15, label %19
15:
- %16 = load i64, i64* %1, align 8
+ %16 = load i64, ptr %1, align 8
%17 = bitcast <16 x i8> %2 to <2 x i64>
%18 = insertelement <2 x i64> %17, i64 %16, i64 0
br label %21
@@ -3275,11 +3275,11 @@ define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16
%24 = bitcast <2 x i64> %22 to <16 x i8>
%25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
%26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
- store <16 x i8> %26, <16 x i8>* %3, align 8
+ store <16 x i8> %26, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aesd_set64_cond_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -3301,8 +3301,8 @@ define arm_aapcs_vfpcc void @aesd_set64_cond_via_val(i1 zeroext %0, i64 %1, <16
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
- %5 = bitcast <16 x i8>* %3 to <2 x i64>*
- %6 = load <2 x i64>, <2 x i64>* %5, align 8
+ %5 = bitcast ptr %3 to ptr
+ %6 = load <2 x i64>, ptr %5, align 8
%7 = insertelement <2 x i64> %6, i64 %1, i64 0
%8 = select i1 %0, <2 x i64> %7, <2 x i64> %6
%9 = bitcast <16 x i8> %2 to <2 x i64>
@@ -3312,11 +3312,11 @@ define arm_aapcs_vfpcc void @aesd_set64_cond_via_val(i1 zeroext %0, i64 %1, <16
%13 = bitcast <2 x i64> %11 to <16 x i8>
%14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
%15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
- store <16 x i8> %15, <16 x i8>* %3, align 8
+ store <16 x i8> %15, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_set64_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_set64_loop_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
@@ -3359,21 +3359,21 @@ define arm_aapcs_vfpcc void @aesd_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8>
; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r11, pc}
- %5 = load i64, i64* %1, align 8
+ %5 = load i64, ptr %1, align 8
%6 = bitcast <16 x i8> %2 to <2 x i64>
%7 = insertelement <2 x i64> %6, i64 %5, i64 0
%8 = bitcast <2 x i64> %7 to <16 x i8>
- %9 = bitcast <16 x i8>* %3 to i64*
- store i64 %5, i64* %9, align 8
+ %9 = bitcast ptr %3 to ptr
+ store i64 %5, ptr %9, align 8
%10 = icmp eq i32 %0, 0
br i1 %10, label %14, label %11
11:
- %12 = load <16 x i8>, <16 x i8>* %3, align 8
+ %12 = load <16 x i8>, ptr %3, align 8
br label %15
13:
- store <16 x i8> %19, <16 x i8>* %3, align 8
+ store <16 x i8> %19, ptr %3, align 8
br label %14
14:
@@ -3389,7 +3389,7 @@ define arm_aapcs_vfpcc void @aesd_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8>
br i1 %21, label %13, label %15
}
-define arm_aapcs_vfpcc void @aesd_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aesd_set64_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -3417,8 +3417,8 @@ define arm_aapcs_vfpcc void @aesd_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %
%7 = bitcast <16 x i8> %2 to <2 x i64>
%8 = insertelement <2 x i64> %7, i64 %1, i64 0
%9 = bitcast <2 x i64> %8 to <16 x i8>
- %10 = bitcast <16 x i8>* %3 to <2 x i64>*
- %11 = bitcast <16 x i8>* %3 to i64*
+ %10 = bitcast ptr %3 to ptr
+ %11 = bitcast ptr %3 to ptr
br label %13
12:
@@ -3426,19 +3426,19 @@ define arm_aapcs_vfpcc void @aesd_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %
13:
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
- %15 = load <2 x i64>, <2 x i64>* %10, align 8
+ %15 = load <2 x i64>, ptr %10, align 8
%16 = insertelement <2 x i64> %15, i64 %1, i64 0
%17 = bitcast <2 x i64> %16 to <16 x i8>
- store i64 %1, i64* %11, align 8
+ store i64 %1, ptr %11, align 8
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
- store <16 x i8> %19, <16 x i8>* %3, align 8
+ store <16 x i8> %19, ptr %3, align 8
%20 = add nuw i32 %14, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %12, label %13
}
-define arm_aapcs_vfpcc void @aesd_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aesd_setf16_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
@@ -3462,10 +3462,10 @@ define arm_aapcs_vfpcc void @aesd_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
- %4 = bitcast half* %0 to i16*
- %5 = load i16, i16* %4, align 2
- %6 = bitcast <16 x i8>* %2 to <8 x i16>*
- %7 = load <8 x i16>, <8 x i16>* %6, align 8
+ %4 = bitcast ptr %0 to ptr
+ %5 = load i16, ptr %4, align 2
+ %6 = bitcast ptr %2 to ptr
+ %7 = load <8 x i16>, ptr %6, align 8
%8 = insertelement <8 x i16> %7, i16 %5, i64 0
%9 = bitcast <8 x i16> %8 to <16 x i8>
%10 = bitcast <16 x i8> %1 to <8 x i16>
@@ -3473,11 +3473,11 @@ define arm_aapcs_vfpcc void @aesd_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i
%12 = bitcast <8 x i16> %11 to <16 x i8>
%13 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %9, <16 x i8> %12)
%14 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %13)
- store <16 x i8> %14, <16 x i8>* %2, align 8
+ store <16 x i8> %14, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_setf16_via_val(half %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aesd_setf16_via_val(half %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-LABEL: aesd_setf16_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q1, q1, q1
@@ -3489,8 +3489,8 @@ define arm_aapcs_vfpcc void @aesd_setf16_via_val(half %0, <16 x i8> %1, <16 x i8
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: bx lr
- %4 = bitcast <16 x i8>* %2 to <8 x i16>*
- %5 = load <8 x i16>, <8 x i16>* %4, align 8
+ %4 = bitcast ptr %2 to ptr
+ %5 = load <8 x i16>, ptr %4, align 8
%6 = bitcast half %0 to i16
%7 = insertelement <8 x i16> %5, i16 %6, i64 0
%8 = bitcast <8 x i16> %7 to <16 x i8>
@@ -3499,11 +3499,11 @@ define arm_aapcs_vfpcc void @aesd_setf16_via_val(half %0, <16 x i8> %1, <16 x i8
%11 = bitcast <8 x i16> %10 to <16 x i8>
%12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
%13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
- store <16 x i8> %13, <16 x i8>* %2, align 8
+ store <16 x i8> %13, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_cond_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -3685,17 +3685,17 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <
br i1 %0, label %5, label %12
5:
- %6 = bitcast half* %1 to i16*
- %7 = load i16, i16* %6, align 2
- %8 = bitcast <16 x i8>* %3 to <8 x i16>*
- %9 = load <8 x i16>, <8 x i16>* %8, align 8
+ %6 = bitcast ptr %1 to ptr
+ %7 = load i16, ptr %6, align 2
+ %8 = bitcast ptr %3 to ptr
+ %9 = load <8 x i16>, ptr %8, align 8
%10 = insertelement <8 x i16> %9, i16 %7, i64 0
%11 = bitcast <8 x i16> %10 to <8 x half>
br label %15
12:
- %13 = bitcast <16 x i8>* %3 to <8 x half>*
- %14 = load <8 x half>, <8 x half>* %13, align 8
+ %13 = bitcast ptr %3 to ptr
+ %14 = load <8 x half>, ptr %13, align 8
br label %15
15:
@@ -3703,8 +3703,8 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <
br i1 %0, label %17, label %23
17:
- %18 = bitcast half* %1 to i16*
- %19 = load i16, i16* %18, align 2
+ %18 = bitcast ptr %1 to ptr
+ %19 = load i16, ptr %18, align 2
%20 = bitcast <16 x i8> %2 to <8 x i16>
%21 = insertelement <8 x i16> %20, i16 %19, i64 0
%22 = bitcast <8 x i16> %21 to <8 x half>
@@ -3720,11 +3720,11 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <
%28 = bitcast <8 x half> %26 to <16 x i8>
%29 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %27, <16 x i8> %28)
%30 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %29)
- store <16 x i8> %30, <16 x i8>* %3, align 8
+ store <16 x i8> %30, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_cond_via_val:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -3906,16 +3906,16 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <1
br i1 %0, label %5, label %11
5:
- %6 = bitcast <16 x i8>* %3 to <8 x i16>*
- %7 = load <8 x i16>, <8 x i16>* %6, align 8
+ %6 = bitcast ptr %3 to ptr
+ %7 = load <8 x i16>, ptr %6, align 8
%8 = bitcast half %1 to i16
%9 = insertelement <8 x i16> %7, i16 %8, i64 0
%10 = bitcast <8 x i16> %9 to <8 x half>
br label %14
11:
- %12 = bitcast <16 x i8>* %3 to <8 x half>*
- %13 = load <8 x half>, <8 x half>* %12, align 8
+ %12 = bitcast ptr %3 to ptr
+ %13 = load <8 x half>, ptr %12, align 8
br label %14
14:
@@ -3939,11 +3939,11 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <1
%26 = bitcast <8 x half> %24 to <16 x i8>
%27 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %25, <16 x i8> %26)
%28 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %27)
- store <16 x i8> %28, <16 x i8>* %3, align 8
+ store <16 x i8> %28, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_setf16_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aesd_setf16_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -3962,22 +3962,22 @@ define arm_aapcs_vfpcc void @aesd_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
- %5 = bitcast half* %1 to i16*
- %6 = load i16, i16* %5, align 2
+ %5 = bitcast ptr %1 to ptr
+ %6 = load i16, ptr %5, align 2
%7 = bitcast <16 x i8> %2 to <8 x i16>
%8 = insertelement <8 x i16> %7, i16 %6, i64 0
%9 = bitcast <8 x i16> %8 to <16 x i8>
- %10 = bitcast <16 x i8>* %3 to i16*
- store i16 %6, i16* %10, align 8
+ %10 = bitcast ptr %3 to ptr
+ store i16 %6, ptr %10, align 8
%11 = icmp eq i32 %0, 0
br i1 %11, label %15, label %12
12:
- %13 = load <16 x i8>, <16 x i8>* %3, align 8
+ %13 = load <16 x i8>, ptr %3, align 8
br label %16
14:
- store <16 x i8> %20, <16 x i8>* %3, align 8
+ store <16 x i8> %20, ptr %3, align 8
br label %15
15:
@@ -3993,7 +3993,7 @@ define arm_aapcs_vfpcc void @aesd_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8
br i1 %22, label %14, label %16
}
-define arm_aapcs_vfpcc void @aesd_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aesd_setf16_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q1, q1, q1
@@ -4020,8 +4020,8 @@ define arm_aapcs_vfpcc void @aesd_setf16_loop_via_val(i32 %0, half %1, <16 x i8>
%8 = bitcast half %1 to i16
%9 = insertelement <8 x i16> %7, i16 %8, i64 0
%10 = bitcast <8 x i16> %9 to <16 x i8>
- %11 = bitcast <16 x i8>* %3 to <8 x i16>*
- %12 = bitcast <16 x i8>* %3 to half*
+ %11 = bitcast ptr %3 to ptr
+ %12 = bitcast ptr %3 to ptr
br label %14
13:
@@ -4029,19 +4029,19 @@ define arm_aapcs_vfpcc void @aesd_setf16_loop_via_val(i32 %0, half %1, <16 x i8>
14:
%15 = phi i32 [ 0, %6 ], [ %21, %14 ]
- %16 = load <8 x i16>, <8 x i16>* %11, align 8
+ %16 = load <8 x i16>, ptr %11, align 8
%17 = insertelement <8 x i16> %16, i16 %8, i64 0
%18 = bitcast <8 x i16> %17 to <16 x i8>
- store half %1, half* %12, align 8
+ store half %1, ptr %12, align 8
%19 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %18, <16 x i8> %10)
%20 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %19)
- store <16 x i8> %20, <16 x i8>* %3, align 8
+ store <16 x i8> %20, ptr %3, align 8
%21 = add nuw i32 %15, 1
%22 = icmp eq i32 %21, %0
br i1 %22, label %13, label %14
}
-define arm_aapcs_vfpcc void @aesd_setf32_via_ptr(float* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aesd_setf32_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-LABEL: aesd_setf32_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vldr s0, [r0]
@@ -4053,9 +4053,9 @@ define arm_aapcs_vfpcc void @aesd_setf32_via_ptr(float* %0, <16 x i8> %1, <16 x
; CHECK-FIX-NEXT: aesimc.8 q8, q1
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
- %4 = load float, float* %0, align 4
- %5 = bitcast <16 x i8>* %2 to <4 x float>*
- %6 = load <4 x float>, <4 x float>* %5, align 8
+ %4 = load float, ptr %0, align 4
+ %5 = bitcast ptr %2 to ptr
+ %6 = load <4 x float>, ptr %5, align 8
%7 = insertelement <4 x float> %6, float %4, i64 0
%8 = bitcast <4 x float> %7 to <16 x i8>
%9 = bitcast <16 x i8> %1 to <4 x float>
@@ -4063,11 +4063,11 @@ define arm_aapcs_vfpcc void @aesd_setf32_via_ptr(float* %0, <16 x i8> %1, <16 x
%11 = bitcast <4 x float> %10 to <16 x i8>
%12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
%13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
- store <16 x i8> %13, <16 x i8>* %2, align 8
+ store <16 x i8> %13, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_setf32_via_val(float %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
+define arm_aapcs_vfpcc void @aesd_setf32_via_val(float %0, <16 x i8> %1, ptr %2) nounwind {
; CHECK-FIX-LABEL: aesd_setf32_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vmov.f32 s4, s0
@@ -4079,8 +4079,8 @@ define arm_aapcs_vfpcc void @aesd_setf32_via_val(float %0, <16 x i8> %1, <16 x i
; CHECK-FIX-NEXT: aesimc.8 q8, q0
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: bx lr
- %4 = bitcast <16 x i8>* %2 to <4 x float>*
- %5 = load <4 x float>, <4 x float>* %4, align 8
+ %4 = bitcast ptr %2 to ptr
+ %5 = load <4 x float>, ptr %4, align 8
%6 = insertelement <4 x float> %5, float %0, i64 0
%7 = bitcast <4 x float> %6 to <16 x i8>
%8 = bitcast <16 x i8> %1 to <4 x float>
@@ -4088,11 +4088,11 @@ define arm_aapcs_vfpcc void @aesd_setf32_via_val(float %0, <16 x i8> %1, <16 x i
%10 = bitcast <4 x float> %9 to <16 x i8>
%11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
%12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
- store <16 x i8> %12, <16 x i8>* %2, align 8
+ store <16 x i8> %12, ptr %2, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-LABEL: aesd_setf32_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
@@ -4118,15 +4118,15 @@ define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, float* %1,
br i1 %0, label %5, label %10
5:
- %6 = load float, float* %1, align 4
- %7 = bitcast <16 x i8>* %3 to <4 x float>*
- %8 = load <4 x float>, <4 x float>* %7, align 8
+ %6 = load float, ptr %1, align 4
+ %7 = bitcast ptr %3 to ptr
+ %8 = load <4 x float>, ptr %7, align 8
%9 = insertelement <4 x float> %8, float %6, i64 0
br label %13
10:
- %11 = bitcast <16 x i8>* %3 to <4 x float>*
- %12 = load <4 x float>, <4 x float>* %11, align 8
+ %11 = bitcast ptr %3 to ptr
+ %12 = load <4 x float>, ptr %11, align 8
br label %13
13:
@@ -4134,7 +4134,7 @@ define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, float* %1,
br i1 %0, label %15, label %19
15:
- %16 = load float, float* %1, align 4
+ %16 = load float, ptr %1, align 4
%17 = bitcast <16 x i8> %2 to <4 x float>
%18 = insertelement <4 x float> %17, float %16, i64 0
br label %21
@@ -4149,11 +4149,11 @@ define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, float* %1,
%24 = bitcast <4 x float> %22 to <16 x i8>
%25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
%26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
- store <16 x i8> %26, <16 x i8>* %3, align 8
+ store <16 x i8> %26, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_cond_via_val:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
@@ -4181,8 +4181,8 @@ define arm_aapcs_vfpcc void @aesd_setf32_cond_via_val(i1 zeroext %0, float %1, <
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q2
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
- %5 = bitcast <16 x i8>* %3 to <4 x float>*
- %6 = load <4 x float>, <4 x float>* %5, align 8
+ %5 = bitcast ptr %3 to ptr
+ %6 = load <4 x float>, ptr %5, align 8
%7 = insertelement <4 x float> %6, float %1, i64 0
%8 = select i1 %0, <4 x float> %7, <4 x float> %6
%9 = bitcast <16 x i8> %2 to <4 x float>
@@ -4192,11 +4192,11 @@ define arm_aapcs_vfpcc void @aesd_setf32_cond_via_val(i1 zeroext %0, float %1, <
%13 = bitcast <4 x float> %11 to <16 x i8>
%14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
%15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
- store <16 x i8> %15, <16 x i8>* %3, align 8
+ store <16 x i8> %15, ptr %3, align 8
ret void
}
-define arm_aapcs_vfpcc void @aesd_setf32_loop_via_ptr(i32 %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_setf32_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_loop_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vldr s4, [r1]
@@ -4234,21 +4234,21 @@ define arm_aapcs_vfpcc void @aesd_setf32_loop_via_ptr(i32 %0, float* %1, <16 x i
; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: bx lr
- %5 = load float, float* %1, align 4
+ %5 = load float, ptr %1, align 4
%6 = bitcast <16 x i8> %2 to <4 x float>
%7 = insertelement <4 x float> %6, float %5, i64 0
%8 = bitcast <4 x float> %7 to <16 x i8>
- %9 = bitcast <16 x i8>* %3 to float*
- store float %5, float* %9, align 8
+ %9 = bitcast ptr %3 to ptr
+ store float %5, ptr %9, align 8
%10 = icmp eq i32 %0, 0
br i1 %10, label %14, label %11
11:
- %12 = load <16 x i8>, <16 x i8>* %3, align 8
+ %12 = load <16 x i8>, ptr %3, align 8
br label %15
13:
- store <16 x i8> %19, <16 x i8>* %3, align 8
+ store <16 x i8> %19, ptr %3, align 8
br label %14
14:
@@ -4264,7 +4264,7 @@ define arm_aapcs_vfpcc void @aesd_setf32_loop_via_ptr(i32 %0, float* %1, <16 x i
br i1 %21, label %13, label %15
}
-define arm_aapcs_vfpcc void @aesd_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
+define arm_aapcs_vfpcc void @aesd_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, ptr %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_loop_via_val:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
@@ -4309,8 +4309,8 @@ define arm_aapcs_vfpcc void @aesd_setf32_loop_via_val(i32 %0, float %1, <16 x i8
%7 = bitcast <16 x i8> %2 to <4 x float>
%8 = insertelement <4 x float> %7, float %1, i64 0
%9 = bitcast <4 x float> %8 to <16 x i8>
- %10 = bitcast <16 x i8>* %3 to <4 x float>*
- %11 = bitcast <16 x i8>* %3 to float*
+ %10 = bitcast ptr %3 to ptr
+ %11 = bitcast ptr %3 to ptr
br label %13
12:
@@ -4318,19 +4318,19 @@ define arm_aapcs_vfpcc void @aesd_setf32_loop_via_val(i32 %0, float %1, <16 x i8
13:
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
- %15 = load <4 x float>, <4 x float>* %10, align 8
+ %15 = load <4 x float>, ptr %10, align 8
%16 = insertelement <4 x float> %15, float %1, i64 0
%17 = bitcast <4 x float> %16 to <16 x i8>
- store float %1, float* %11, align 8
+ store float %1, ptr %11, align 8
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
- store <16 x i8> %19, <16 x i8>* %3, align 8
+ store <16 x i8> %19, ptr %3, align 8
%20 = add nuw i32 %14, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %12, label %13
}
-define arm_aapcs_vfpcc void @aese_constantisland(<16 x i8>* %0) nounwind {
+define arm_aapcs_vfpcc void @aese_constantisland(ptr %0) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_constantisland:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r0]
@@ -4388,9 +4388,9 @@ define arm_aapcs_vfpcc void @aese_constantisland(<16 x i8>* %0) nounwind {
; CHECK-CORTEX-FIX-NEXT: .byte 13 @ 0xd
; CHECK-CORTEX-FIX-NEXT: .byte 14 @ 0xe
; CHECK-CORTEX-FIX-NEXT: .byte 15 @ 0xf
- %2 = load <16 x i8>, <16 x i8>* %0, align 8
+ %2 = load <16 x i8>, ptr %0, align 8
%3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8> %2)
%4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
- store <16 x i8> %4, <16 x i8>* %0, align 8
+ store <16 x i8> %4, ptr %0, align 8
ret void
}
diff --git a/llvm/test/CodeGen/ARM/aliases.ll b/llvm/test/CodeGen/ARM/aliases.ll
index cc423afa240c4..6075ad813e990 100644
--- a/llvm/test/CodeGen/ARM/aliases.ll
+++ b/llvm/test/CodeGen/ARM/aliases.ll
@@ -48,7 +48,7 @@ define i32 @foo_f() {
@A = alias i64, ptr @bar
@structvar = private global {i32, i32} {i32 1, i32 2}
- at elem0 = alias i32, getelementptr({i32, i32}, ptr @structvar, i32 0, i32 0)
+ at elem0 = alias i32, ptr @structvar
@elem1 = alias i32, getelementptr({i32, i32}, ptr @structvar, i32 0, i32 1)
define i32 @test() {
diff --git a/llvm/test/CodeGen/ARM/code-placement.ll b/llvm/test/CodeGen/ARM/code-placement.ll
index 01d72f134aacb..c8bb10cd104cf 100644
--- a/llvm/test/CodeGen/ARM/code-placement.ll
+++ b/llvm/test/CodeGen/ARM/code-placement.ll
@@ -2,12 +2,12 @@
; PHI elimination shouldn't break backedge.
%struct.list_data_s = type { i16, i16 }
-%struct.list_head = type { %struct.list_head*, %struct.list_data_s* }
+%struct.list_head = type { ptr, ptr }
-define arm_apcscc %struct.list_head* @t1(%struct.list_head* %list) nounwind {
+define arm_apcscc ptr @t1(ptr %list) nounwind {
entry:
; CHECK-LABEL: t1:
- %0 = icmp eq %struct.list_head* %list, null
+ %0 = icmp eq ptr %list, null
br i1 %0, label %bb2, label %bb
bb:
@@ -15,21 +15,21 @@ bb:
; CHECK: bne LBB0_[[LABEL]]
; CHECK-NOT: b LBB0_[[LABEL]]
; CHECK: bx lr
- %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
- %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
- %1 = getelementptr inbounds %struct.list_head, %struct.list_head* %list_addr.05, i32 0, i32 0
- %2 = load %struct.list_head*, %struct.list_head** %1, align 4
- store %struct.list_head* %next.04, %struct.list_head** %1, align 4
- %3 = icmp eq %struct.list_head* %2, null
+ %list_addr.05 = phi ptr [ %2, %bb ], [ %list, %entry ]
+ %next.04 = phi ptr [ %list_addr.05, %bb ], [ null, %entry ]
+ %1 = getelementptr inbounds %struct.list_head, ptr %list_addr.05, i32 0, i32 0
+ %2 = load ptr, ptr %1, align 4
+ store ptr %next.04, ptr %1, align 4
+ %3 = icmp eq ptr %2, null
br i1 %3, label %bb2, label %bb
bb2:
- %next.0.lcssa = phi %struct.list_head* [ null, %entry ], [ %list_addr.05, %bb ]
- ret %struct.list_head* %next.0.lcssa
+ %next.0.lcssa = phi ptr [ null, %entry ], [ %list_addr.05, %bb ]
+ ret ptr %next.0.lcssa
}
; Optimize loop entry, eliminate intra loop branches
-define i32 @t2(i32 %passes, i32* nocapture %src, i32 %size) nounwind readonly {
+define i32 @t2(i32 %passes, ptr nocapture %src, i32 %size) nounwind readonly {
entry:
; CHECK-LABEL: t2:
%0 = icmp eq i32 %passes, 0 ; <i1> [#uses=1]
@@ -42,8 +42,8 @@ bb1: ; preds = %bb2.preheader, %bb1
%indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2]
%sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]
%tmp17 = sub i32 %i.07, %indvar ; <i32> [#uses=1]
- %scevgep = getelementptr i32, i32* %src, i32 %tmp17 ; <i32*> [#uses=1]
- %1 = load i32, i32* %scevgep, align 4 ; <i32> [#uses=1]
+ %scevgep = getelementptr i32, ptr %src, i32 %tmp17 ; <i32*> [#uses=1]
+ %1 = load i32, ptr %scevgep, align 4 ; <i32> [#uses=1]
%2 = add nsw i32 %1, %sum.08 ; <i32> [#uses=2]
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next, %size ; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/constant-island-movwt.mir b/llvm/test/CodeGen/ARM/constant-island-movwt.mir
index 2a2b4a9d65dde..7d21a4e4875c3 100644
--- a/llvm/test/CodeGen/ARM/constant-island-movwt.mir
+++ b/llvm/test/CodeGen/ARM/constant-island-movwt.mir
@@ -6,7 +6,7 @@
target datalayout = "e-m:w-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv7-unknown-windows-gnu"
- %struct.A = type { [201 x i8*] }
+ %struct.A = type { [201 x ptr] }
@.str.17 = private unnamed_addr constant [10 x i8] c"__ashlhi3\00", align 1
@.str.18 = private unnamed_addr constant [10 x i8] c"__ashlsi3\00", align 1
@@ -147,166 +147,166 @@
@.str.153 = private unnamed_addr constant [9 x i8] c"copysign\00", align 1
; Function Attrs: nounwind
- define arm_aapcs_vfpcc void @func(%struct.A* %obj) #0 {
+ define arm_aapcs_vfpcc void @func(ptr %obj) #0 {
entry:
- %arrayidx.i1 = bitcast %struct.A* %obj to i8**
- %0 = bitcast i8** %arrayidx.i1 to <4 x i8*>*
- store <4 x i8*> <i8* null, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.18, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.19, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.20, i32 0, i32 0)>, <4 x i8*>* %0
- %arrayidx.i62 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 4
- %1 = bitcast i8** %arrayidx.i62 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.21, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.22, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.23, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.24, i32 0, i32 0)>, <4 x i8*>* %1
- %arrayidx.i523 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 8
- %2 = bitcast i8** %arrayidx.i523 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.25, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.26, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.27, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.28, i32 0, i32 0)>, <4 x i8*>* %2
- %arrayidx.i519 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 12
- %3 = bitcast i8** %arrayidx.i519 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.29, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.30, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.31, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.32, i32 0, i32 0)>, <4 x i8*>* %3
- %arrayidx.i515 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 16
- %4 = bitcast i8** %arrayidx.i515 to i8*
- call void @llvm.memset.p0i8.i64(i8* align 4 %4, i8 0, i64 40, i1 false)
- %arrayidx.i511 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 20
- %5 = bitcast i8** %arrayidx.i511 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.37, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.38, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.39, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.40, i32 0, i32 0)>, <4 x i8*>* %5
- %arrayidx.i507 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 24
- %6 = bitcast i8** %arrayidx.i507 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.41, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.42, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.43, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.44, i32 0, i32 0)>, <4 x i8*>* %6
- %arrayidx.i503 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 28
- %7 = bitcast i8** %arrayidx.i503 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.45, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.46, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.47, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.48, i32 0, i32 0)>, <4 x i8*>* %7
- %arrayidx.i499 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 32
- %8 = bitcast i8** %arrayidx.i499 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.49, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.50, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.51, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.52, i32 0, i32 0)>, <4 x i8*>* %8
- %arrayidx.i495 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 36
- %9 = bitcast i8** %arrayidx.i495 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.53, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.54, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.55, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.56, i32 0, i32 0)>, <4 x i8*>* %9
- %arrayidx.i491 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 40
- %arrayidx.i481 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 50
- %10 = bitcast i8** %arrayidx.i491 to i8*
- call void @llvm.memset.p0i8.i64(i8* align 4 %10, i8 0, i64 40, i1 false)
- %11 = bitcast i8** %arrayidx.i481 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.57, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.58, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.59, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.60, i32 0, i32 0)>, <4 x i8*>* %11
- %arrayidx.i477 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 54
- %12 = bitcast i8** %arrayidx.i477 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.61, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.62, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.63, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.64, i32 0, i32 0)>, <4 x i8*>* %12
- %arrayidx.i473 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 58
- %13 = bitcast i8** %arrayidx.i473 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.65, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.66, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.67, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.68, i32 0, i32 0)>, <4 x i8*>* %13
- %arrayidx.i469 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 62
- %14 = bitcast i8** %arrayidx.i469 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.69, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.70, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.71, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.72, i32 0, i32 0)>, <4 x i8*>* %14
- %arrayidx.i465 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 66
- %15 = bitcast i8** %arrayidx.i465 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.73, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.74, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.75, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.76, i32 0, i32 0)>, <4 x i8*>* %15
- %arrayidx.i461 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 70
- %16 = bitcast i8** %arrayidx.i461 to i8*
- call void @llvm.memset.p0i8.i64(i8* align 4 %16, i8 0, i64 40, i1 false)
- %arrayidx.i457 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 74
- %17 = bitcast i8** %arrayidx.i457 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.81, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.81, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.81, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.82, i32 0, i32 0)>, <4 x i8*>* %17
- %arrayidx.i453 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 78
- %18 = bitcast i8** %arrayidx.i453 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.83, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.84, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.84, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.84, i32 0, i32 0)>, <4 x i8*>* %18
- %arrayidx.i449 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 82
- %arrayidx.i445 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 86
- %19 = bitcast i8** %arrayidx.i445 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.88, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.89, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.90, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.91, i32 0, i32 0)>, <4 x i8*>* %19
- %arrayidx.i441 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 90
- %20 = bitcast i8** %arrayidx.i441 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.91, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.91, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.92, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.93, i32 0, i32 0)>, <4 x i8*>* %20
- %arrayidx.i437 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 94
- %21 = bitcast i8** %arrayidx.i437 to i8*
- call void @llvm.memset.p0i8.i64(i8* align 4 %21, i8 0, i64 28, i1 false)
- %arrayidx.i433 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 98
- %22 = bitcast i8** %arrayidx.i433 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.96, i32 0, i32 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.97, i32 0, i32 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.97, i32 0, i32 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.97, i32 0, i32 0)>, <4 x i8*>* %22
- %arrayidx.i429 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 102
- %23 = bitcast i8** %arrayidx.i429 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.98, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.99, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.100, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.100, i32 0, i32 0)>, <4 x i8*>* %23
- %arrayidx.i425 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 106
- %24 = bitcast i8** %arrayidx.i425 to i8*
- call void @llvm.memset.p0i8.i64(i8* align 4 %24, i8 0, i64 28, i1 false)
- %arrayidx.i421 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 110
- %25 = bitcast i8** %arrayidx.i421 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.103, i32 0, i32 0), i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.103, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.104, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.105, i32 0, i32 0)>, <4 x i8*>* %25
- %arrayidx.i417 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 114
- %26 = bitcast i8** %arrayidx.i417 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.106, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.106, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.106, i32 0, i32 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.107, i32 0, i32 0)>, <4 x i8*>* %26
- %arrayidx.i413 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 118
- %27 = bitcast i8** %arrayidx.i413 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.108, i32 0, i32 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.109, i32 0, i32 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.109, i32 0, i32 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.109, i32 0, i32 0)>, <4 x i8*>* %27
- %arrayidx.i409 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 122
- %28 = bitcast i8** %arrayidx.i409 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.110, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.111, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.112, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.112, i32 0, i32 0)>, <4 x i8*>* %28
- %arrayidx.i405 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 126
- %29 = bitcast i8** %arrayidx.i405 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.112, i32 0, i32 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.113, i32 0, i32 0), i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.114, i32 0, i32 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.115, i32 0, i32 0)>, <4 x i8*>* %29
- %arrayidx.i401 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 130
- %30 = bitcast i8** %arrayidx.i401 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.115, i32 0, i32 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.115, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.116, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.117, i32 0, i32 0)>, <4 x i8*>* %30
- %arrayidx.i397 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 134
- %31 = bitcast i8** %arrayidx.i397 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.118, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.118, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.118, i32 0, i32 0), i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.119, i32 0, i32 0)>, <4 x i8*>* %31
- %arrayidx.i393 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 138
- %32 = bitcast i8** %arrayidx.i393 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.120, i32 0, i32 0), i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.121, i32 0, i32 0), i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.121, i32 0, i32 0), i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.121, i32 0, i32 0)>, <4 x i8*>* %32
- %arrayidx.i389 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 142
- %33 = bitcast i8** %arrayidx.i389 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.122, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.123, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.124, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.124, i32 0, i32 0)>, <4 x i8*>* %33
- %arrayidx.i385 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 146
- %34 = bitcast i8** %arrayidx.i385 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.124, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.125, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.126, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.127, i32 0, i32 0)>, <4 x i8*>* %34
- %arrayidx.i381 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 150
- store i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.127, i32 0, i32 0), i8** %arrayidx.i381
- %arrayidx.i380 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 151
- store i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.127, i32 0, i32 0), i8** %arrayidx.i380
- %arrayidx.i379 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 152
- %arrayidx.i375 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 156
- %arrayidx.i374 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 157
- %arrayidx.i373 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 158
- %arrayidx.i372 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 159
- %35 = bitcast i8** %arrayidx.i379 to i8*
- call void @llvm.memset.p0i8.i64(i8* align 4 %35, i8 0, i64 28, i1 false)
- store i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.128, i32 0, i32 0), i8** %arrayidx.i372
- %arrayidx.i371 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 160
- store i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.129, i32 0, i32 0), i8** %arrayidx.i371
- %arrayidx.i370 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 161
- %36 = bitcast i8** %arrayidx.i370 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.130, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.130, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.130, i32 0, i32 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.131, i32 0, i32 0)>, <4 x i8*>* %36
- %arrayidx.i366 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 165
- %37 = bitcast i8** %arrayidx.i366 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.132, i32 0, i32 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.133, i32 0, i32 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.133, i32 0, i32 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.133, i32 0, i32 0)>, <4 x i8*>* %37
- %arrayidx.i362 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 169
- %38 = bitcast i8** %arrayidx.i362 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.134, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.135, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.136, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.136, i32 0, i32 0)>, <4 x i8*>* %38
- %arrayidx.i358 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 173
- %39 = bitcast i8** %arrayidx.i358 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.136, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.137, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.138, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.139, i32 0, i32 0)>, <4 x i8*>* %39
- %arrayidx.i354 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 177
- %40 = bitcast i8** %arrayidx.i354 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.139, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.139, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.140, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.141, i32 0, i32 0)>, <4 x i8*>* %40
- %arrayidx.i350 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 181
- %41 = bitcast i8** %arrayidx.i350 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.142, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.142, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.142, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.143, i32 0, i32 0)>, <4 x i8*>* %41
- %arrayidx.i346 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 185
- %42 = bitcast i8** %arrayidx.i346 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.144, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.145, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.145, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.145, i32 0, i32 0)>, <4 x i8*>* %42
- %arrayidx.i342 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 189
- %43 = bitcast i8** %arrayidx.i342 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.146, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.147, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.148, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.148, i32 0, i32 0)>, <4 x i8*>* %43
- %arrayidx.i338 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 193
- %44 = bitcast i8** %arrayidx.i338 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.148, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.149, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.150, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.151, i32 0, i32 0)>, <4 x i8*>* %44
- %arrayidx.i334 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 197
- %45 = bitcast i8** %arrayidx.i334 to <4 x i8*>*
- store <4 x i8*> <i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.151, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.151, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.152, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.153, i32 0, i32 0)>, <4 x i8*>* %45
+ %arrayidx.i1 = bitcast ptr %obj to ptr
+ %0 = bitcast ptr %arrayidx.i1 to ptr
+ store <4 x ptr> <ptr null, ptr @.str.18, ptr @.str.19, ptr @.str.20>, ptr %0
+ %arrayidx.i62 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 4
+ %1 = bitcast ptr %arrayidx.i62 to ptr
+ store <4 x ptr> <ptr @.str.21, ptr @.str.22, ptr @.str.23, ptr @.str.24>, ptr %1
+ %arrayidx.i523 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 8
+ %2 = bitcast ptr %arrayidx.i523 to ptr
+ store <4 x ptr> <ptr @.str.25, ptr @.str.26, ptr @.str.27, ptr @.str.28>, ptr %2
+ %arrayidx.i519 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 12
+ %3 = bitcast ptr %arrayidx.i519 to ptr
+ store <4 x ptr> <ptr @.str.29, ptr @.str.30, ptr @.str.31, ptr @.str.32>, ptr %3
+ %arrayidx.i515 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 16
+ %4 = bitcast ptr %arrayidx.i515 to ptr
+ call void @llvm.memset.p0.i64(ptr align 4 %4, i8 0, i64 40, i1 false)
+ %arrayidx.i511 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 20
+ %5 = bitcast ptr %arrayidx.i511 to ptr
+ store <4 x ptr> <ptr @.str.37, ptr @.str.38, ptr @.str.39, ptr @.str.40>, ptr %5
+ %arrayidx.i507 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 24
+ %6 = bitcast ptr %arrayidx.i507 to ptr
+ store <4 x ptr> <ptr @.str.41, ptr @.str.42, ptr @.str.43, ptr @.str.44>, ptr %6
+ %arrayidx.i503 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 28
+ %7 = bitcast ptr %arrayidx.i503 to ptr
+ store <4 x ptr> <ptr @.str.45, ptr @.str.46, ptr @.str.47, ptr @.str.48>, ptr %7
+ %arrayidx.i499 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 32
+ %8 = bitcast ptr %arrayidx.i499 to ptr
+ store <4 x ptr> <ptr @.str.49, ptr @.str.50, ptr @.str.51, ptr @.str.52>, ptr %8
+ %arrayidx.i495 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 36
+ %9 = bitcast ptr %arrayidx.i495 to ptr
+ store <4 x ptr> <ptr @.str.53, ptr @.str.54, ptr @.str.55, ptr @.str.56>, ptr %9
+ %arrayidx.i491 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 40
+ %arrayidx.i481 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 50
+ %10 = bitcast ptr %arrayidx.i491 to ptr
+ call void @llvm.memset.p0.i64(ptr align 4 %10, i8 0, i64 40, i1 false)
+ %11 = bitcast ptr %arrayidx.i481 to ptr
+ store <4 x ptr> <ptr @.str.57, ptr @.str.58, ptr @.str.59, ptr @.str.60>, ptr %11
+ %arrayidx.i477 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 54
+ %12 = bitcast ptr %arrayidx.i477 to ptr
+ store <4 x ptr> <ptr @.str.61, ptr @.str.62, ptr @.str.63, ptr @.str.64>, ptr %12
+ %arrayidx.i473 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 58
+ %13 = bitcast ptr %arrayidx.i473 to ptr
+ store <4 x ptr> <ptr @.str.65, ptr @.str.66, ptr @.str.67, ptr @.str.68>, ptr %13
+ %arrayidx.i469 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 62
+ %14 = bitcast ptr %arrayidx.i469 to ptr
+ store <4 x ptr> <ptr @.str.69, ptr @.str.70, ptr @.str.71, ptr @.str.72>, ptr %14
+ %arrayidx.i465 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 66
+ %15 = bitcast ptr %arrayidx.i465 to ptr
+ store <4 x ptr> <ptr @.str.73, ptr @.str.74, ptr @.str.75, ptr @.str.76>, ptr %15
+ %arrayidx.i461 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 70
+ %16 = bitcast ptr %arrayidx.i461 to ptr
+ call void @llvm.memset.p0.i64(ptr align 4 %16, i8 0, i64 40, i1 false)
+ %arrayidx.i457 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 74
+ %17 = bitcast ptr %arrayidx.i457 to ptr
+ store <4 x ptr> <ptr @.str.81, ptr @.str.81, ptr @.str.81, ptr @.str.82>, ptr %17
+ %arrayidx.i453 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 78
+ %18 = bitcast ptr %arrayidx.i453 to ptr
+ store <4 x ptr> <ptr @.str.83, ptr @.str.84, ptr @.str.84, ptr @.str.84>, ptr %18
+ %arrayidx.i449 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 82
+ %arrayidx.i445 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 86
+ %19 = bitcast ptr %arrayidx.i445 to ptr
+ store <4 x ptr> <ptr @.str.88, ptr @.str.89, ptr @.str.90, ptr @.str.91>, ptr %19
+ %arrayidx.i441 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 90
+ %20 = bitcast ptr %arrayidx.i441 to ptr
+ store <4 x ptr> <ptr @.str.91, ptr @.str.91, ptr @.str.92, ptr @.str.93>, ptr %20
+ %arrayidx.i437 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 94
+ %21 = bitcast ptr %arrayidx.i437 to ptr
+ call void @llvm.memset.p0.i64(ptr align 4 %21, i8 0, i64 28, i1 false)
+ %arrayidx.i433 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 98
+ %22 = bitcast ptr %arrayidx.i433 to ptr
+ store <4 x ptr> <ptr @.str.96, ptr @.str.97, ptr @.str.97, ptr @.str.97>, ptr %22
+ %arrayidx.i429 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 102
+ %23 = bitcast ptr %arrayidx.i429 to ptr
+ store <4 x ptr> <ptr @.str.98, ptr @.str.99, ptr @.str.100, ptr @.str.100>, ptr %23
+ %arrayidx.i425 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 106
+ %24 = bitcast ptr %arrayidx.i425 to ptr
+ call void @llvm.memset.p0.i64(ptr align 4 %24, i8 0, i64 28, i1 false)
+ %arrayidx.i421 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 110
+ %25 = bitcast ptr %arrayidx.i421 to ptr
+ store <4 x ptr> <ptr @.str.103, ptr @.str.103, ptr @.str.104, ptr @.str.105>, ptr %25
+ %arrayidx.i417 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 114
+ %26 = bitcast ptr %arrayidx.i417 to ptr
+ store <4 x ptr> <ptr @.str.106, ptr @.str.106, ptr @.str.106, ptr @.str.107>, ptr %26
+ %arrayidx.i413 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 118
+ %27 = bitcast ptr %arrayidx.i413 to ptr
+ store <4 x ptr> <ptr @.str.108, ptr @.str.109, ptr @.str.109, ptr @.str.109>, ptr %27
+ %arrayidx.i409 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 122
+ %28 = bitcast ptr %arrayidx.i409 to ptr
+ store <4 x ptr> <ptr @.str.110, ptr @.str.111, ptr @.str.112, ptr @.str.112>, ptr %28
+ %arrayidx.i405 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 126
+ %29 = bitcast ptr %arrayidx.i405 to ptr
+ store <4 x ptr> <ptr @.str.112, ptr @.str.113, ptr @.str.114, ptr @.str.115>, ptr %29
+ %arrayidx.i401 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 130
+ %30 = bitcast ptr %arrayidx.i401 to ptr
+ store <4 x ptr> <ptr @.str.115, ptr @.str.115, ptr @.str.116, ptr @.str.117>, ptr %30
+ %arrayidx.i397 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 134
+ %31 = bitcast ptr %arrayidx.i397 to ptr
+ store <4 x ptr> <ptr @.str.118, ptr @.str.118, ptr @.str.118, ptr @.str.119>, ptr %31
+ %arrayidx.i393 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 138
+ %32 = bitcast ptr %arrayidx.i393 to ptr
+ store <4 x ptr> <ptr @.str.120, ptr @.str.121, ptr @.str.121, ptr @.str.121>, ptr %32
+ %arrayidx.i389 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 142
+ %33 = bitcast ptr %arrayidx.i389 to ptr
+ store <4 x ptr> <ptr @.str.122, ptr @.str.123, ptr @.str.124, ptr @.str.124>, ptr %33
+ %arrayidx.i385 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 146
+ %34 = bitcast ptr %arrayidx.i385 to ptr
+ store <4 x ptr> <ptr @.str.124, ptr @.str.125, ptr @.str.126, ptr @.str.127>, ptr %34
+ %arrayidx.i381 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 150
+ store ptr @.str.127, ptr %arrayidx.i381
+ %arrayidx.i380 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 151
+ store ptr @.str.127, ptr %arrayidx.i380
+ %arrayidx.i379 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 152
+ %arrayidx.i375 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 156
+ %arrayidx.i374 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 157
+ %arrayidx.i373 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 158
+ %arrayidx.i372 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 159
+ %35 = bitcast ptr %arrayidx.i379 to ptr
+ call void @llvm.memset.p0.i64(ptr align 4 %35, i8 0, i64 28, i1 false)
+ store ptr @.str.128, ptr %arrayidx.i372
+ %arrayidx.i371 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 160
+ store ptr @.str.129, ptr %arrayidx.i371
+ %arrayidx.i370 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 161
+ %36 = bitcast ptr %arrayidx.i370 to ptr
+ store <4 x ptr> <ptr @.str.130, ptr @.str.130, ptr @.str.130, ptr @.str.131>, ptr %36
+ %arrayidx.i366 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 165
+ %37 = bitcast ptr %arrayidx.i366 to ptr
+ store <4 x ptr> <ptr @.str.132, ptr @.str.133, ptr @.str.133, ptr @.str.133>, ptr %37
+ %arrayidx.i362 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 169
+ %38 = bitcast ptr %arrayidx.i362 to ptr
+ store <4 x ptr> <ptr @.str.134, ptr @.str.135, ptr @.str.136, ptr @.str.136>, ptr %38
+ %arrayidx.i358 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 173
+ %39 = bitcast ptr %arrayidx.i358 to ptr
+ store <4 x ptr> <ptr @.str.136, ptr @.str.137, ptr @.str.138, ptr @.str.139>, ptr %39
+ %arrayidx.i354 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 177
+ %40 = bitcast ptr %arrayidx.i354 to ptr
+ store <4 x ptr> <ptr @.str.139, ptr @.str.139, ptr @.str.140, ptr @.str.141>, ptr %40
+ %arrayidx.i350 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 181
+ %41 = bitcast ptr %arrayidx.i350 to ptr
+ store <4 x ptr> <ptr @.str.142, ptr @.str.142, ptr @.str.142, ptr @.str.143>, ptr %41
+ %arrayidx.i346 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 185
+ %42 = bitcast ptr %arrayidx.i346 to ptr
+ store <4 x ptr> <ptr @.str.144, ptr @.str.145, ptr @.str.145, ptr @.str.145>, ptr %42
+ %arrayidx.i342 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 189
+ %43 = bitcast ptr %arrayidx.i342 to ptr
+ store <4 x ptr> <ptr @.str.146, ptr @.str.147, ptr @.str.148, ptr @.str.148>, ptr %43
+ %arrayidx.i338 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 193
+ %44 = bitcast ptr %arrayidx.i338 to ptr
+ store <4 x ptr> <ptr @.str.148, ptr @.str.149, ptr @.str.150, ptr @.str.151>, ptr %44
+ %arrayidx.i334 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 197
+ %45 = bitcast ptr %arrayidx.i334 to ptr
+ store <4 x ptr> <ptr @.str.151, ptr @.str.151, ptr @.str.152, ptr @.str.153>, ptr %45
ret void
}
; Function Attrs: argmemonly nounwind
- declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1
+ declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1) #1
; Function Attrs: nounwind
- declare void @llvm.stackprotector(i8*, i8**) #2
+ declare void @llvm.stackprotector(ptr, ptr) #2
attributes #0 = { nounwind "target-cpu"="cortex-a9" "target-features"="+dsp,+fp16,+neon,+strict-align,+thumb-mode,+vfp3" }
attributes #1 = { argmemonly nounwind }
diff --git a/llvm/test/CodeGen/ARM/cortex-a57-misched-basic.ll b/llvm/test/CodeGen/ARM/cortex-a57-misched-basic.ll
index ad729c2ff2a3a..2e8a05417d439 100644
--- a/llvm/test/CodeGen/ARM/cortex-a57-misched-basic.ll
+++ b/llvm/test/CodeGen/ARM/cortex-a57-misched-basic.ll
@@ -41,10 +41,10 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "armv8r-arm-none-eabi"
; Function Attrs: norecurse nounwind readnone
-define hidden i32 @foo(i32 %a, i32 %b, i32 %c, i32* %d) local_unnamed_addr #0 {
+define hidden i32 @foo(i32 %a, i32 %b, i32 %c, ptr %d) local_unnamed_addr #0 {
entry:
%xor = xor i32 %c, %b
- %ld = load i32, i32* %d
+ %ld = load i32, ptr %d
%add = add nsw i32 %xor, %ld
%div = sdiv i32 %a, %b
%sub = sub i32 %div, %add
diff --git a/llvm/test/CodeGen/ARM/debug-info-blocks.ll b/llvm/test/CodeGen/ARM/debug-info-blocks.ll
index 8ef341faed6b7..2f68abef18ed0 100644
--- a/llvm/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/llvm/test/CodeGen/ARM/debug-info-blocks.ll
@@ -16,89 +16,89 @@ target triple = "thumbv7-apple-ios"
%0 = type opaque
%1 = type { [4 x i32] }
-%2 = type <{ i8*, i32, i32, i8*, %struct.Re*, i8*, %3*, %struct.my_struct* }>
+%2 = type <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr }>
%3 = type opaque
%struct.CP = type { float, float }
%struct.CR = type { %struct.CP, %struct.CP }
%struct.Re = type { i32, i32 }
-%struct.__block_byref_mydata = type { i8*, %struct.__block_byref_mydata*, i32, i32, i8*, i8*, %0* }
+%struct.__block_byref_mydata = type { ptr, ptr, i32, i32, ptr, ptr, ptr }
%struct.my_struct = type opaque
-@"\01L_OBJC_SELECTOR_REFERENCES_13" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_13" = external hidden global ptr, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
@"OBJC_IVAR_$_MyWork._bounds" = external hidden global i32, section "__DATA, __objc_const", align 4
@"OBJC_IVAR_$_MyWork._data" = external hidden global i32, section "__DATA, __objc_const", align 4
-@"\01L_OBJC_SELECTOR_REFERENCES_222" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_222" = external hidden global ptr, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-declare i8* @objc_msgSend(i8*, i8*, ...)
+declare ptr @objc_msgSend(ptr, ptr, ...)
declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind
-define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %loadedMydata, [4 x i32] %bounds.coerce0, [4 x i32] %data.coerce0) ssp !dbg !23 {
- %1 = alloca %0*, align 4
+define hidden void @foobar_func_block_invoke_0(ptr %.block_descriptor, ptr %loadedMydata, [4 x i32] %bounds.coerce0, [4 x i32] %data.coerce0) ssp !dbg !23 {
+ %1 = alloca ptr, align 4
%bounds = alloca %struct.CR, align 4
%data = alloca %struct.CR, align 4
- call void @llvm.dbg.value(metadata i8* %.block_descriptor, metadata !27, metadata !DIExpression()), !dbg !129
- store %0* %loadedMydata, %0** %1, align 4
- call void @llvm.dbg.declare(metadata %0** %1, metadata !130, metadata !DIExpression()), !dbg !131
- %2 = bitcast %struct.CR* %bounds to %1*
- %3 = getelementptr %1, %1* %2, i32 0, i32 0
- store [4 x i32] %bounds.coerce0, [4 x i32]* %3
- call void @llvm.dbg.declare(metadata %struct.CR* %bounds, metadata !132, metadata !DIExpression()), !dbg !133
- %4 = bitcast %struct.CR* %data to %1*
- %5 = getelementptr %1, %1* %4, i32 0, i32 0
- store [4 x i32] %data.coerce0, [4 x i32]* %5
- call void @llvm.dbg.declare(metadata %struct.CR* %data, metadata !134, metadata !DIExpression()), !dbg !135
- %6 = bitcast i8* %.block_descriptor to %2*
- %7 = getelementptr inbounds %2, %2* %6, i32 0, i32 6
- call void @llvm.dbg.declare(metadata %2* %6, metadata !136, metadata !163), !dbg !137
- call void @llvm.dbg.declare(metadata %2* %6, metadata !138, metadata !164), !dbg !137
- call void @llvm.dbg.declare(metadata %2* %6, metadata !139, metadata !165), !dbg !140
- %8 = load %0*, %0** %1, align 4, !dbg !141
- %9 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_13", !dbg !141
- %10 = bitcast %0* %8 to i8*, !dbg !141
- %11 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %10, i8* %9), !dbg !141
- %12 = bitcast i8* %11 to %0*, !dbg !141
- %13 = getelementptr inbounds %2, %2* %6, i32 0, i32 5, !dbg !141
- %14 = load i8*, i8** %13, !dbg !141
- %15 = bitcast i8* %14 to %struct.__block_byref_mydata*, !dbg !141
- %16 = getelementptr inbounds %struct.__block_byref_mydata, %struct.__block_byref_mydata* %15, i32 0, i32 1, !dbg !141
- %17 = load %struct.__block_byref_mydata*, %struct.__block_byref_mydata** %16, !dbg !141
- %18 = getelementptr inbounds %struct.__block_byref_mydata, %struct.__block_byref_mydata* %17, i32 0, i32 6, !dbg !141
- store %0* %12, %0** %18, align 4, !dbg !141
- %19 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !143
- %20 = load %3*, %3** %19, align 4, !dbg !143
- %21 = load i32, i32* @"OBJC_IVAR_$_MyWork._data", !dbg !143
- %22 = bitcast %3* %20 to i8*, !dbg !143
- %23 = getelementptr inbounds i8, i8* %22, i32 %21, !dbg !143
- %24 = bitcast i8* %23 to %struct.CR*, !dbg !143
- %25 = bitcast %struct.CR* %24 to i8*, !dbg !143
- %26 = bitcast %struct.CR* %data to i8*, !dbg !143
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %25, i8* align 4 %26, i32 16, i1 false), !dbg !143
- %27 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !144
- %28 = load %3*, %3** %27, align 4, !dbg !144
- %29 = load i32, i32* @"OBJC_IVAR_$_MyWork._bounds", !dbg !144
- %30 = bitcast %3* %28 to i8*, !dbg !144
- %31 = getelementptr inbounds i8, i8* %30, i32 %29, !dbg !144
- %32 = bitcast i8* %31 to %struct.CR*, !dbg !144
- %33 = bitcast %struct.CR* %32 to i8*, !dbg !144
- %34 = bitcast %struct.CR* %bounds to i8*, !dbg !144
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %33, i8* align 4 %34, i32 16, i1 false), !dbg !144
- %35 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !145
- %36 = load %3*, %3** %35, align 4, !dbg !145
- %37 = getelementptr inbounds %2, %2* %6, i32 0, i32 5, !dbg !145
- %38 = load i8*, i8** %37, !dbg !145
- %39 = bitcast i8* %38 to %struct.__block_byref_mydata*, !dbg !145
- %40 = getelementptr inbounds %struct.__block_byref_mydata, %struct.__block_byref_mydata* %39, i32 0, i32 1, !dbg !145
- %41 = load %struct.__block_byref_mydata*, %struct.__block_byref_mydata** %40, !dbg !145
- %42 = getelementptr inbounds %struct.__block_byref_mydata, %struct.__block_byref_mydata* %41, i32 0, i32 6, !dbg !145
- %43 = load %0*, %0** %42, align 4, !dbg !145
- %44 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_222", !dbg !145
- %45 = bitcast %3* %36 to i8*, !dbg !145
- call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %0*)*)(i8* %45, i8* %44, %0* %43), !dbg !145
+ call void @llvm.dbg.value(metadata ptr %.block_descriptor, metadata !27, metadata !DIExpression()), !dbg !129
+ store ptr %loadedMydata, ptr %1, align 4
+ call void @llvm.dbg.declare(metadata ptr %1, metadata !130, metadata !DIExpression()), !dbg !131
+ %2 = bitcast ptr %bounds to ptr
+ %3 = getelementptr %1, ptr %2, i32 0, i32 0
+ store [4 x i32] %bounds.coerce0, ptr %3
+ call void @llvm.dbg.declare(metadata ptr %bounds, metadata !132, metadata !DIExpression()), !dbg !133
+ %4 = bitcast ptr %data to ptr
+ %5 = getelementptr %1, ptr %4, i32 0, i32 0
+ store [4 x i32] %data.coerce0, ptr %5
+ call void @llvm.dbg.declare(metadata ptr %data, metadata !134, metadata !DIExpression()), !dbg !135
+ %6 = bitcast ptr %.block_descriptor to ptr
+ %7 = getelementptr inbounds %2, ptr %6, i32 0, i32 6
+ call void @llvm.dbg.declare(metadata ptr %6, metadata !136, metadata !163), !dbg !137
+ call void @llvm.dbg.declare(metadata ptr %6, metadata !138, metadata !164), !dbg !137
+ call void @llvm.dbg.declare(metadata ptr %6, metadata !139, metadata !165), !dbg !140
+ %8 = load ptr, ptr %1, align 4, !dbg !141
+ %9 = load ptr, ptr @"\01L_OBJC_SELECTOR_REFERENCES_13", !dbg !141
+ %10 = bitcast ptr %8 to ptr, !dbg !141
+ %11 = call ptr @objc_msgSend(ptr %10, ptr %9), !dbg !141
+ %12 = bitcast ptr %11 to ptr, !dbg !141
+ %13 = getelementptr inbounds %2, ptr %6, i32 0, i32 5, !dbg !141
+ %14 = load ptr, ptr %13, !dbg !141
+ %15 = bitcast ptr %14 to ptr, !dbg !141
+ %16 = getelementptr inbounds %struct.__block_byref_mydata, ptr %15, i32 0, i32 1, !dbg !141
+ %17 = load ptr, ptr %16, !dbg !141
+ %18 = getelementptr inbounds %struct.__block_byref_mydata, ptr %17, i32 0, i32 6, !dbg !141
+ store ptr %12, ptr %18, align 4, !dbg !141
+ %19 = getelementptr inbounds %2, ptr %6, i32 0, i32 6, !dbg !143
+ %20 = load ptr, ptr %19, align 4, !dbg !143
+ %21 = load i32, ptr @"OBJC_IVAR_$_MyWork._data", !dbg !143
+ %22 = bitcast ptr %20 to ptr, !dbg !143
+ %23 = getelementptr inbounds i8, ptr %22, i32 %21, !dbg !143
+ %24 = bitcast ptr %23 to ptr, !dbg !143
+ %25 = bitcast ptr %24 to ptr, !dbg !143
+ %26 = bitcast ptr %data to ptr, !dbg !143
+ call void @llvm.memcpy.p0.p0.i32(ptr align 4 %25, ptr align 4 %26, i32 16, i1 false), !dbg !143
+ %27 = getelementptr inbounds %2, ptr %6, i32 0, i32 6, !dbg !144
+ %28 = load ptr, ptr %27, align 4, !dbg !144
+ %29 = load i32, ptr @"OBJC_IVAR_$_MyWork._bounds", !dbg !144
+ %30 = bitcast ptr %28 to ptr, !dbg !144
+ %31 = getelementptr inbounds i8, ptr %30, i32 %29, !dbg !144
+ %32 = bitcast ptr %31 to ptr, !dbg !144
+ %33 = bitcast ptr %32 to ptr, !dbg !144
+ %34 = bitcast ptr %bounds to ptr, !dbg !144
+ call void @llvm.memcpy.p0.p0.i32(ptr align 4 %33, ptr align 4 %34, i32 16, i1 false), !dbg !144
+ %35 = getelementptr inbounds %2, ptr %6, i32 0, i32 6, !dbg !145
+ %36 = load ptr, ptr %35, align 4, !dbg !145
+ %37 = getelementptr inbounds %2, ptr %6, i32 0, i32 5, !dbg !145
+ %38 = load ptr, ptr %37, !dbg !145
+ %39 = bitcast ptr %38 to ptr, !dbg !145
+ %40 = getelementptr inbounds %struct.__block_byref_mydata, ptr %39, i32 0, i32 1, !dbg !145
+ %41 = load ptr, ptr %40, !dbg !145
+ %42 = getelementptr inbounds %struct.__block_byref_mydata, ptr %41, i32 0, i32 6, !dbg !145
+ %43 = load ptr, ptr %42, align 4, !dbg !145
+ %44 = load ptr, ptr @"\01L_OBJC_SELECTOR_REFERENCES_222", !dbg !145
+ %45 = bitcast ptr %36 to ptr, !dbg !145
+ call void @objc_msgSend(ptr %45, ptr %44, ptr %43), !dbg !145
ret void, !dbg !146
}
diff --git a/llvm/test/CodeGen/ARM/debug-info-d16-reg.ll b/llvm/test/CodeGen/ARM/debug-info-d16-reg.ll
index bba2c390ea914..f5e7ffed8bbe9 100644
--- a/llvm/test/CodeGen/ARM/debug-info-d16-reg.ll
+++ b/llvm/test/CodeGen/ARM/debug-info-d16-reg.ll
@@ -10,51 +10,51 @@ target triple = "thumbv7-apple-darwin10"
@.str = private unnamed_addr constant [11 x i8] c"%p %lf %c\0A\00", align 4
@.str1 = private unnamed_addr constant [6 x i8] c"point\00", align 4
-define i32 @inlineprinter(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize !dbg !9 {
+define i32 @inlineprinter(ptr %ptr, double %val, i8 zeroext %c) nounwind optsize !dbg !9 {
entry:
- tail call void @llvm.dbg.value(metadata i8* %ptr, metadata !19, metadata !DIExpression()), !dbg !26
+ tail call void @llvm.dbg.value(metadata ptr %ptr, metadata !19, metadata !DIExpression()), !dbg !26
tail call void @llvm.dbg.value(metadata double %val, metadata !20, metadata !DIExpression()), !dbg !26
tail call void @llvm.dbg.value(metadata i8 %c, metadata !21, metadata !DIExpression()), !dbg !26
%0 = zext i8 %c to i32, !dbg !27
- %1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !27
+ %1 = tail call i32 (ptr, ...) @printf(ptr @.str, ptr %ptr, double %val, i32 %0) nounwind, !dbg !27
ret i32 0, !dbg !29
}
-define i32 @printer(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize noinline !dbg !0 {
+define i32 @printer(ptr %ptr, double %val, i8 zeroext %c) nounwind optsize noinline !dbg !0 {
entry:
- tail call void @llvm.dbg.value(metadata i8* %ptr, metadata !16, metadata !DIExpression()), !dbg !30
+ tail call void @llvm.dbg.value(metadata ptr %ptr, metadata !16, metadata !DIExpression()), !dbg !30
tail call void @llvm.dbg.value(metadata double %val, metadata !17, metadata !DIExpression()), !dbg !30
tail call void @llvm.dbg.value(metadata i8 %c, metadata !18, metadata !DIExpression()), !dbg !30
%0 = zext i8 %c to i32, !dbg !31
- %1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !31
+ %1 = tail call i32 (ptr, ...) @printf(ptr @.str, ptr %ptr, double %val, i32 %0) nounwind, !dbg !31
ret i32 0, !dbg !33
}
-declare i32 @printf(i8* nocapture, ...) nounwind
+declare i32 @printf(ptr nocapture, ...) nounwind
declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize !dbg !10 {
+define i32 @main(i32 %argc, ptr nocapture %argv) nounwind optsize !dbg !10 {
entry:
tail call void @llvm.dbg.value(metadata i32 %argc, metadata !22, metadata !DIExpression()), !dbg !34
- tail call void @llvm.dbg.value(metadata i8** %argv, metadata !23, metadata !DIExpression()), !dbg !34
+ tail call void @llvm.dbg.value(metadata ptr %argv, metadata !23, metadata !DIExpression()), !dbg !34
%0 = sitofp i32 %argc to double, !dbg !35
%1 = fadd double %0, 5.555552e+05, !dbg !35
tail call void @llvm.dbg.value(metadata double %1, metadata !24, metadata !DIExpression()), !dbg !35
- %2 = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str1, i32 0, i32 0)) nounwind, !dbg !36
- %3 = getelementptr inbounds i8, i8* bitcast (i32 (i32, i8**)* @main to i8*), i32 %argc, !dbg !37
+ %2 = tail call i32 @puts(ptr @.str1) nounwind, !dbg !36
+ %3 = getelementptr inbounds i8, ptr @main, i32 %argc, !dbg !37
%4 = trunc i32 %argc to i8, !dbg !37
%5 = add i8 %4, 97, !dbg !37
- tail call void @llvm.dbg.value(metadata i8* %3, metadata !49, metadata !DIExpression()) nounwind, !dbg !38
+ tail call void @llvm.dbg.value(metadata ptr %3, metadata !49, metadata !DIExpression()) nounwind, !dbg !38
tail call void @llvm.dbg.value(metadata double %1, metadata !50, metadata !DIExpression()) nounwind, !dbg !38
tail call void @llvm.dbg.value(metadata i8 %5, metadata !51, metadata !DIExpression()) nounwind, !dbg !38
%6 = zext i8 %5 to i32, !dbg !39
- %7 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %3, double %1, i32 %6) nounwind, !dbg !39
- %8 = tail call i32 @printer(i8* %3, double %1, i8 zeroext %5) nounwind, !dbg !40
+ %7 = tail call i32 (ptr, ...) @printf(ptr @.str, ptr %3, double %1, i32 %6) nounwind, !dbg !39
+ %8 = tail call i32 @printer(ptr %3, double %1, i8 zeroext %5) nounwind, !dbg !40
ret i32 0, !dbg !41
}
-declare i32 @puts(i8* nocapture) nounwind
+declare i32 @puts(ptr nocapture) nounwind
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!48}
diff --git a/llvm/test/CodeGen/ARM/debug-info-s16-reg.ll b/llvm/test/CodeGen/ARM/debug-info-s16-reg.ll
index c260b2ef6749b..2f256141f4c6f 100644
--- a/llvm/test/CodeGen/ARM/debug-info-s16-reg.ll
+++ b/llvm/test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -10,53 +10,53 @@ target triple = "thumbv7-apple-macosx10.6.7"
@.str = private unnamed_addr constant [11 x i8] c"%p %lf %c\0A\00"
@.str1 = private unnamed_addr constant [6 x i8] c"point\00"
-define i32 @inlineprinter(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize ssp !dbg !0 {
+define i32 @inlineprinter(ptr %ptr, float %val, i8 zeroext %c) nounwind optsize ssp !dbg !0 {
entry:
- tail call void @llvm.dbg.value(metadata i8* %ptr, metadata !8, metadata !DIExpression()), !dbg !24
+ tail call void @llvm.dbg.value(metadata ptr %ptr, metadata !8, metadata !DIExpression()), !dbg !24
tail call void @llvm.dbg.value(metadata float %val, metadata !10, metadata !DIExpression()), !dbg !25
tail call void @llvm.dbg.value(metadata i8 %c, metadata !12, metadata !DIExpression()), !dbg !26
%conv = fpext float %val to double, !dbg !27
%conv3 = zext i8 %c to i32, !dbg !27
- %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !27
+ %call = tail call i32 (ptr, ...) @printf(ptr @.str, ptr %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !27
ret i32 0, !dbg !29
}
-declare i32 @printf(i8* nocapture, ...) nounwind optsize
+declare i32 @printf(ptr nocapture, ...) nounwind optsize
-define i32 @printer(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize noinline ssp !dbg !6 {
+define i32 @printer(ptr %ptr, float %val, i8 zeroext %c) nounwind optsize noinline ssp !dbg !6 {
entry:
- tail call void @llvm.dbg.value(metadata i8* %ptr, metadata !14, metadata !DIExpression()), !dbg !30
+ tail call void @llvm.dbg.value(metadata ptr %ptr, metadata !14, metadata !DIExpression()), !dbg !30
tail call void @llvm.dbg.value(metadata float %val, metadata !15, metadata !DIExpression()), !dbg !31
tail call void @llvm.dbg.value(metadata i8 %c, metadata !16, metadata !DIExpression()), !dbg !32
%conv = fpext float %val to double, !dbg !33
%conv3 = zext i8 %c to i32, !dbg !33
- %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !33
+ %call = tail call i32 (ptr, ...) @printf(ptr @.str, ptr %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !33
ret i32 0, !dbg !35
}
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize ssp !dbg !7 {
+define i32 @main(i32 %argc, ptr nocapture %argv) nounwind optsize ssp !dbg !7 {
entry:
tail call void @llvm.dbg.value(metadata i32 %argc, metadata !17, metadata !DIExpression()), !dbg !36
- tail call void @llvm.dbg.value(metadata i8** %argv, metadata !18, metadata !DIExpression()), !dbg !37
+ tail call void @llvm.dbg.value(metadata ptr %argv, metadata !18, metadata !DIExpression()), !dbg !37
%conv = sitofp i32 %argc to double, !dbg !38
%add = fadd double %conv, 5.555552e+05, !dbg !38
%conv1 = fptrunc double %add to float, !dbg !38
tail call void @llvm.dbg.value(metadata float %conv1, metadata !22, metadata !DIExpression()), !dbg !38
- %call = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str1, i32 0, i32 0)) nounwind optsize, !dbg !39
- %add.ptr = getelementptr i8, i8* bitcast (i32 (i32, i8**)* @main to i8*), i32 %argc, !dbg !40
+ %call = tail call i32 @puts(ptr @.str1) nounwind optsize, !dbg !39
+ %add.ptr = getelementptr i8, ptr @main, i32 %argc, !dbg !40
%add5 = add nsw i32 %argc, 97, !dbg !40
%conv6 = trunc i32 %add5 to i8, !dbg !40
- tail call void @llvm.dbg.value(metadata i8* %add.ptr, metadata !58, metadata !DIExpression()) nounwind, !dbg !41
+ tail call void @llvm.dbg.value(metadata ptr %add.ptr, metadata !58, metadata !DIExpression()) nounwind, !dbg !41
tail call void @llvm.dbg.value(metadata float %conv1, metadata !60, metadata !DIExpression()) nounwind, !dbg !42
tail call void @llvm.dbg.value(metadata i8 %conv6, metadata !62, metadata !DIExpression()) nounwind, !dbg !43
%conv.i = fpext float %conv1 to double, !dbg !44
%conv3.i = and i32 %add5, 255, !dbg !44
- %call.i = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %add.ptr, double %conv.i, i32 %conv3.i) nounwind optsize, !dbg !44
- %call14 = tail call i32 @printer(i8* %add.ptr, float %conv1, i8 zeroext %conv6) optsize, !dbg !45
+ %call.i = tail call i32 (ptr, ...) @printf(ptr @.str, ptr %add.ptr, double %conv.i, i32 %conv3.i) nounwind optsize, !dbg !44
+ %call14 = tail call i32 @printer(ptr %add.ptr, float %conv1, i8 zeroext %conv6) optsize, !dbg !45
ret i32 0, !dbg !46
}
-declare i32 @puts(i8* nocapture) nounwind optsize
+declare i32 @puts(ptr nocapture) nounwind optsize
declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone
diff --git a/llvm/test/CodeGen/ARM/dwarf-eh.ll b/llvm/test/CodeGen/ARM/dwarf-eh.ll
index 34a20328fb025..75cf0861a3e46 100644
--- a/llvm/test/CodeGen/ARM/dwarf-eh.ll
+++ b/llvm/test/CodeGen/ARM/dwarf-eh.ll
@@ -9,55 +9,55 @@ target triple = "armv5e--netbsd-eabi"
%struct.exception = type { i8 }
- at _ZTVN10__cxxabiv117__class_type_infoE = external global i8*
+ at _ZTVN10__cxxabiv117__class_type_infoE = external global ptr
@_ZTS9exception = linkonce_odr constant [11 x i8] c"9exception\00"
- at _ZTI9exception = linkonce_odr unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @_ZTS9exception, i32 0, i32 0) }
+ at _ZTI9exception = linkonce_odr unnamed_addr constant { ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i32 2), ptr @_ZTS9exception }
-define void @f() uwtable personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
- %1 = alloca i8*
+define void @f() uwtable personality ptr @__gxx_personality_v0 {
+ %1 = alloca ptr
%2 = alloca i32
- %e = alloca %struct.exception*, align 4
+ %e = alloca ptr, align 4
invoke void @g()
to label %3 unwind label %4
br label %16
- %5 = landingpad { i8*, i32 }
- catch i8* bitcast ({ i8*, i8* }* @_ZTI9exception to i8*)
- %6 = extractvalue { i8*, i32 } %5, 0
- store i8* %6, i8** %1
- %7 = extractvalue { i8*, i32 } %5, 1
- store i32 %7, i32* %2
+ %5 = landingpad { ptr, i32 }
+ catch ptr @_ZTI9exception
+ %6 = extractvalue { ptr, i32 } %5, 0
+ store ptr %6, ptr %1
+ %7 = extractvalue { ptr, i32 } %5, 1
+ store i32 %7, ptr %2
br label %8
- %9 = load i32, i32* %2
- %10 = call i32 @llvm.eh.typeid.for(i8* bitcast ({ i8*, i8* }* @_ZTI9exception to i8*)) nounwind
+ %9 = load i32, ptr %2
+ %10 = call i32 @llvm.eh.typeid.for(ptr @_ZTI9exception) nounwind
%11 = icmp eq i32 %9, %10
br i1 %11, label %12, label %17
- %13 = load i8*, i8** %1
- %14 = call i8* @__cxa_begin_catch(i8* %13) #3
- %15 = bitcast i8* %14 to %struct.exception*
- store %struct.exception* %15, %struct.exception** %e
+ %13 = load ptr, ptr %1
+ %14 = call ptr @__cxa_begin_catch(ptr %13) #3
+ %15 = bitcast ptr %14 to ptr
+ store ptr %15, ptr %e
call void @__cxa_end_catch()
br label %16
ret void
- %18 = load i8*, i8** %1
- %19 = load i32, i32* %2
- %20 = insertvalue { i8*, i32 } undef, i8* %18, 0
- %21 = insertvalue { i8*, i32 } %20, i32 %19, 1
- resume { i8*, i32 } %21
+ %18 = load ptr, ptr %1
+ %19 = load i32, ptr %2
+ %20 = insertvalue { ptr, i32 } undef, ptr %18, 0
+ %21 = insertvalue { ptr, i32 } %20, i32 %19, 1
+ resume { ptr, i32 } %21
}
declare void @g()
declare i32 @__gxx_personality_v0(...)
-declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+declare i32 @llvm.eh.typeid.for(ptr) nounwind readnone
-declare i8* @__cxa_begin_catch(i8*)
+declare ptr @__cxa_begin_catch(ptr)
declare void @__cxa_end_catch()
diff --git a/llvm/test/CodeGen/ARM/ldrcppic.ll b/llvm/test/CodeGen/ARM/ldrcppic.ll
index 0a4c8abcad6c4..5ec70b508ffcf 100644
--- a/llvm/test/CodeGen/ARM/ldrcppic.ll
+++ b/llvm/test/CodeGen/ARM/ldrcppic.ll
@@ -23,7 +23,7 @@ define dso_local fastcc void @_ZN15UsecaseSelector26IsAllowedImplDefinedFormatE1
br i1 undef, label %4, label %13
; <label>:4: ; preds = %3
- %5 = getelementptr inbounds [16 x i32], ptr bitcast (<{ i32, i32, i32, i32, [12 x i32] }>* @_ZN15UsecaseSelector25AllowedImplDefinedFormatsE to ptr), i32 0, i32 undef
+ %5 = getelementptr inbounds [16 x i32], ptr @_ZN15UsecaseSelector25AllowedImplDefinedFormatsE, i32 0, i32 undef
%6 = load i32, ptr %5, align 4
%7 = icmp eq i32 10, %6
br i1 %7, label %9, label %8
diff --git a/llvm/test/CodeGen/ARM/misched-copy-arm.ll b/llvm/test/CodeGen/ARM/misched-copy-arm.ll
index dbed4650c3923..d830a2ced6316 100644
--- a/llvm/test/CodeGen/ARM/misched-copy-arm.ll
+++ b/llvm/test/CodeGen/ARM/misched-copy-arm.ll
@@ -9,7 +9,7 @@
; CHECK: t2ADDrr
; CHECK: t2CMPrr
; CHECK: COPY
-define i32 @postinc(i32 %a, i32* nocapture %d, i32 %s) nounwind {
+define i32 @postinc(i32 %a, ptr nocapture %d, i32 %s) nounwind {
entry:
%cmp4 = icmp eq i32 %a, 0
br i1 %cmp4, label %for.end, label %for.body
@@ -18,8 +18,8 @@ for.body: ; preds = %entry, %for.body
%indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%s.05 = phi i32 [ %mul, %for.body ], [ 0, %entry ]
%indvars.iv.next = add i32 %indvars.iv, %s
- %arrayidx = getelementptr inbounds i32, i32* %d, i32 %indvars.iv
- %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %d, i32 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
%mul = mul nsw i32 %0, %s.05
%exitcond = icmp eq i32 %indvars.iv.next, %a
br i1 %exitcond, label %for.end, label %for.body
@@ -41,16 +41,16 @@ for.end: ; preds = %for.body, %entry
%union.rtunion_def = type { i64 }
; Function Attrs: nounwind ssp
-declare hidden fastcc void @df_ref_record(i32* nocapture, %struct.rtx_def*, %struct.rtx_def**, %struct.rtx_def*, i32, i32) #0
+declare hidden fastcc void @df_ref_record(ptr nocapture, ptr, ptr, ptr, i32, i32) #0
; Function Attrs: nounwind ssp
-define hidden fastcc void @df_def_record_1(i32* nocapture %df, %struct.rtx_def* %x, %struct.rtx_def* %insn) #0 {
+define hidden fastcc void @df_def_record_1(ptr nocapture %df, ptr %x, ptr %insn) #0 {
entry:
br label %while.cond
while.cond: ; preds = %if.end28, %entry
- %loc.0 = phi %struct.rtx_def** [ %rtx31, %if.end28 ], [ undef, %entry ]
- %dst.0 = phi %struct.rtx_def* [ %0, %if.end28 ], [ undef, %entry ]
+ %loc.0 = phi ptr [ %rtx31, %if.end28 ], [ undef, %entry ]
+ %dst.0 = phi ptr [ %0, %if.end28 ], [ undef, %entry ]
switch i32 undef, label %if.end47 [
i32 61, label %if.then46
i32 64, label %if.then24
@@ -62,14 +62,14 @@ if.then24: ; preds = %while.cond
br label %if.end28
if.end28: ; preds = %if.then24, %while.cond, %while.cond
- %dst.1 = phi %struct.rtx_def* [ undef, %if.then24 ], [ %dst.0, %while.cond ], [ %dst.0, %while.cond ]
- %arrayidx30 = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %dst.1, i32 0, i32 1, i32 0
- %rtx31 = bitcast %union.rtunion_def* %arrayidx30 to %struct.rtx_def**
- %0 = load %struct.rtx_def*, %struct.rtx_def** %rtx31, align 4
+ %dst.1 = phi ptr [ undef, %if.then24 ], [ %dst.0, %while.cond ], [ %dst.0, %while.cond ]
+ %arrayidx30 = getelementptr inbounds %struct.rtx_def, ptr %dst.1, i32 0, i32 1, i32 0
+ %rtx31 = bitcast ptr %arrayidx30 to ptr
+ %0 = load ptr, ptr %rtx31, align 4
br label %while.cond
if.then46: ; preds = %while.cond
- tail call fastcc void @df_ref_record(i32* %df, %struct.rtx_def* %dst.0, %struct.rtx_def** %loc.0, %struct.rtx_def* %insn, i32 0, i32 undef)
+ tail call fastcc void @df_ref_record(ptr %df, ptr %dst.0, ptr %loc.0, ptr %insn, i32 0, i32 undef)
unreachable
if.end47: ; preds = %while.cond
diff --git a/llvm/test/CodeGen/ARM/no-register-coalescing-in-returnsTwice.mir b/llvm/test/CodeGen/ARM/no-register-coalescing-in-returnsTwice.mir
index 9285bbc15f023..5c59566247d89 100644
--- a/llvm/test/CodeGen/ARM/no-register-coalescing-in-returnsTwice.mir
+++ b/llvm/test/CodeGen/ARM/no-register-coalescing-in-returnsTwice.mir
@@ -33,55 +33,55 @@
define i32 @main() {
entry:
%P0 = alloca %struct.S37, align 8
- %0 = bitcast %struct.S37* %P0 to %struct.S18*
+ %0 = bitcast ptr %P0 to ptr
%jb1 = alloca [20 x i64], align 8
%P1 = alloca %struct.S18, align 8
%jb2 = alloca [20 x i64], align 8
- %1 = bitcast %struct.S37* %P0 to i8*
- %M2.i = getelementptr inbounds %struct.S37, %struct.S37* %P0, i32 0, i32 2
- %2 = bitcast %struct.S38* %M2.i to i8*
- call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(48) %2, i8 0, i64 48, i1 false)
- %M6.i = getelementptr inbounds %struct.S37, %struct.S37* %P0, i32 0, i32 7
- store i32 0, i32* %M6.i, align 8
- %3 = bitcast [20 x i64]* %jb1 to i8*
- %arraydecay1 = bitcast [20 x i64]* %jb1 to i64*
- %call1 = call i32 @setjmp(i64* nonnull %arraydecay1)
+ %1 = bitcast ptr %P0 to ptr
+ %M2.i = getelementptr inbounds %struct.S37, ptr %P0, i32 0, i32 2
+ %2 = bitcast ptr %M2.i to ptr
+ call void @llvm.memset.p0.i64(ptr nonnull align 8 dereferenceable(48) %2, i8 0, i64 48, i1 false)
+ %M6.i = getelementptr inbounds %struct.S37, ptr %P0, i32 0, i32 7
+ store i32 0, ptr %M6.i, align 8
+ %3 = bitcast ptr %jb1 to ptr
+ %arraydecay1 = bitcast ptr %jb1 to ptr
+ %call1 = call i32 @setjmp(ptr nonnull %arraydecay1)
%tobool = icmp eq i32 %call1, 0
br i1 %tobool, label %if.then, label %if.end
if.then: ; preds = %entry
- %4 = bitcast [20 x i64]* %jb1 to i64*
- call void (i64*, %struct.S37*, ...) @_Z3barPx3S37z(i64* nonnull %4, %struct.S37* nonnull byval(%struct.S37) align 8 %P0)
+ %4 = bitcast ptr %jb1 to ptr
+ call void (ptr, ptr, ...) @_Z3barPx3S37z(ptr nonnull %4, ptr nonnull byval(%struct.S37) align 8 %P0)
unreachable
if.end: ; preds = %entry
- %5 = bitcast [20 x i64]* %jb1 to i8*
- %6 = bitcast %struct.S37* %P0 to i8*
+ %5 = bitcast ptr %jb1 to ptr
+ %6 = bitcast ptr %P0 to ptr
call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{sp},~{lr}"()
- %7 = bitcast %struct.S18* %0 to i8*
- %BM0.i = getelementptr inbounds %struct.S18, %struct.S18* %0, i32 0, i32 2
- store double 0.000000e+00, double* %BM0.i, align 8
- %M0.i = getelementptr inbounds %struct.S18, %struct.S18* %0, i32 0, i32 5
- store i32 42, i32* %M0.i, align 8
- %M3.i = getelementptr inbounds %struct.S18, %struct.S18* %0, i32 0, i32 7
- %8 = bitcast %struct.S23* %M3.i to i8*
- call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(28) %8, i8 0, i64 28, i1 false)
- %9 = bitcast [20 x i64]* %jb1 to i8*
- %arraydecay42 = bitcast [20 x i64]* %jb1 to i64*
- %call5 = call i32 @setjmp(i64* nonnull %arraydecay42)
+ %7 = bitcast ptr %0 to ptr
+ %BM0.i = getelementptr inbounds %struct.S18, ptr %0, i32 0, i32 2
+ store double 0.000000e+00, ptr %BM0.i, align 8
+ %M0.i = getelementptr inbounds %struct.S18, ptr %0, i32 0, i32 5
+ store i32 42, ptr %M0.i, align 8
+ %M3.i = getelementptr inbounds %struct.S18, ptr %0, i32 0, i32 7
+ %8 = bitcast ptr %M3.i to ptr
+ call void @llvm.memset.p0.i64(ptr nonnull align 8 dereferenceable(28) %8, i8 0, i64 28, i1 false)
+ %9 = bitcast ptr %jb1 to ptr
+ %arraydecay42 = bitcast ptr %jb1 to ptr
+ %call5 = call i32 @setjmp(ptr nonnull %arraydecay42)
%tobool6 = icmp eq i32 %call5, 0
br i1 %tobool6, label %if.then7, label %if.end10
if.then7: ; preds = %if.end
- %10 = bitcast [20 x i64]* %jb1 to i64*
- call void (i64*, %struct.S18*, ...) @_Z3fooPx3S18z(i64* nonnull %10, %struct.S18* nonnull byval(%struct.S18) align 8 %0)
+ %10 = bitcast ptr %jb1 to ptr
+ call void (ptr, ptr, ...) @_Z3fooPx3S18z(ptr nonnull %10, ptr nonnull byval(%struct.S18) align 8 %0)
unreachable
if.end10: ; preds = %if.end
- %11 = bitcast [20 x i64]* %jb1 to i8*
- %12 = bitcast %struct.S18* %0 to i8*
+ %11 = bitcast ptr %jb1 to ptr
+ %12 = bitcast ptr %0 to ptr
ret i32 0
}
- declare i32 @setjmp(i64*)
- declare void @_Z3barPx3S37z(i64*, %struct.S37* byval(%struct.S37) align 8, ...)
- declare void @_Z3fooPx3S18z(i64*, %struct.S18* byval(%struct.S18) align 8, ...)
- declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
+ declare i32 @setjmp(ptr)
+ declare void @_Z3barPx3S37z(ptr, ptr byval(%struct.S37) align 8, ...)
+ declare void @_Z3fooPx3S18z(ptr, ptr byval(%struct.S18) align 8, ...)
+ declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
...
---
name: main
diff --git a/llvm/test/CodeGen/ARM/readonly-aliases.ll b/llvm/test/CodeGen/ARM/readonly-aliases.ll
index b12c5ead73cb5..11014e9f10838 100644
--- a/llvm/test/CodeGen/ARM/readonly-aliases.ll
+++ b/llvm/test/CodeGen/ARM/readonly-aliases.ll
@@ -1,7 +1,7 @@
; RUN: llc -mtriple thumbv7-unknown-linux-android -filetype asm -o - %s | FileCheck %s
@a = protected constant <{ i32, i32 }> <{ i32 0, i32 0 }>
- at b = protected alias i32, getelementptr(i32, ptr getelementptr inbounds (<{ i32, i32 }>, <{ i32, i32 }>* @a, i32 0, i32 1), i32 -1)
+ at b = protected alias i32, getelementptr(i32, ptr getelementptr inbounds (<{ i32, i32 }>, ptr @a, i32 0, i32 1), i32 -1)
declare void @f(ptr)
diff --git a/llvm/test/CodeGen/ARM/tail-dup-kill-flags.ll b/llvm/test/CodeGen/ARM/tail-dup-kill-flags.ll
index bce6cdc4e28db..2ad9ded8fb55b 100644
--- a/llvm/test/CodeGen/ARM/tail-dup-kill-flags.ll
+++ b/llvm/test/CodeGen/ARM/tail-dup-kill-flags.ll
@@ -8,19 +8,19 @@ target triple = "thumbv7-apple-ios8.0.0"
; CHECK-LABEL: @test
-%struct.c
diff _ctx = type { i8*, %struct.c
diff _node*, %struct.c
diff _node*, %struct.c
diff _node*, %struct.c
diff _node* }
-%struct.c
diff _node = type { i32, i8*, i8*, %struct.c
diff _node* }
+%struct.c
diff _ctx = type { ptr, ptr, ptr, ptr, ptr }
+%struct.c
diff _node = type { i32, ptr, ptr, ptr }
declare i32 @logg(i32)
-define hidden i32 @test(%struct.c
diff _ctx* nocapture %ctx, %struct.c
diff _node* %tmp10) {
+define hidden i32 @test(ptr nocapture %ctx, ptr %tmp10) {
bb:
br label %.outer
bb33: ; preds = %bb92, %.outer
%lines.0 = phi i32 [ %tmp37, %bb92 ], [ %lines.0.ph, %.outer ]
%tmp37 = add i32 %lines.0, 1
- %tmp39 = load i32, i32* %tmp57, align 4
+ %tmp39 = load i32, ptr %tmp57, align 4
%tmp40 = icmp eq i32 %tmp39, %tmp37
br i1 %tmp40, label %bb41, label %bb92
@@ -34,14 +34,14 @@ bb47: ; preds = %bb41
ret i32 -1
bb53: ; preds = %bb41
- %tmp54 = getelementptr inbounds %struct.c
diff _node, %struct.c
diff _node* %del.0.ph, i32 0, i32 3
- %tmp55 = load %struct.c
diff _node*, %struct.c
diff _node** %tmp54, align 4
+ %tmp54 = getelementptr inbounds %struct.c
diff _node, ptr %del.0.ph, i32 0, i32 3
+ %tmp55 = load ptr, ptr %tmp54, align 4
br label %.outer
.outer: ; preds = %bb53, %bb
- %del.0.ph = phi %struct.c
diff _node* [ %tmp55, %bb53 ], [ null, %bb ]
+ %del.0.ph = phi ptr [ %tmp55, %bb53 ], [ null, %bb ]
%lines.0.ph = phi i32 [ 1, %bb53 ], [ 0, %bb ]
- %tmp57 = getelementptr inbounds %struct.c
diff _node, %struct.c
diff _node* %del.0.ph, i32 0, i32 0
+ %tmp57 = getelementptr inbounds %struct.c
diff _node, ptr %del.0.ph, i32 0, i32 0
br label %bb33
bb92: ; preds = %bb33
diff --git a/llvm/test/CodeGen/Thumb/PR36658.mir b/llvm/test/CodeGen/Thumb/PR36658.mir
index 26721d68cc937..e2986638fcc64 100644
--- a/llvm/test/CodeGen/Thumb/PR36658.mir
+++ b/llvm/test/CodeGen/Thumb/PR36658.mir
@@ -26,12 +26,12 @@
declare i32 @foo2(i32)
- declare i32 @foo3(i32*)
+ declare i32 @foo3(ptr)
; Function Attrs: nounwind optsize
- define internal fastcc i32 @foo4(i32* nocapture %ignore_ptr) #0 {
+ define internal fastcc i32 @foo4(ptr nocapture %ignore_ptr) #0 {
entry:
- %call = tail call i32 @foo3(i32* undef)
+ %call = tail call i32 @foo3(ptr undef)
switch i32 %call, label %sw.epilog [
i32 120, label %sw.bb
i32 48, label %sw.bb73
@@ -62,7 +62,7 @@
br label %while.cond
while.cond: ; preds = %while.cond, %sw.bb
- %call5 = tail call i32 @foo3(i32* null)
+ %call5 = tail call i32 @foo3(ptr null)
br label %while.cond
sw.bb73: ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
@@ -74,7 +74,7 @@
br label %while.body83
while.body83: ; preds = %while.body83.preheader, %while.body83
- %call87 = tail call i32 @foo3(i32* null)
+ %call87 = tail call i32 @foo3(ptr null)
br label %while.body83
while.end88: ; preds = %sw.bb73
@@ -82,7 +82,7 @@
unreachable
sw.bb91: ; preds = %entry
- store i32 1, i32* %ignore_ptr, align 4
+ store i32 1, ptr %ignore_ptr, align 4
br label %cleanup
sw.bb93: ; preds = %entry
@@ -110,7 +110,7 @@
br i1 undef, label %cleanup, label %if.then109
if.then109: ; preds = %sw.bb107
- %call110 = tail call i32 bitcast (i32 (...)* @foo1 to i32 (i8*, i32)*)(i8* undef, i32 %call)
+ %call110 = tail call i32 @foo1(ptr undef, i32 %call)
unreachable
sw.epilog: ; preds = %entry
@@ -123,7 +123,7 @@
}
; Function Attrs: nounwind
- declare void @llvm.stackprotector(i8*, i8**) #1
+ declare void @llvm.stackprotector(ptr, ptr) #1
attributes #0 = { nounwind optsize }
attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/Thumb/branch-to-return.ll b/llvm/test/CodeGen/Thumb/branch-to-return.ll
index 5bfccc0637550..11e8add6f215b 100644
--- a/llvm/test/CodeGen/Thumb/branch-to-return.ll
+++ b/llvm/test/CodeGen/Thumb/branch-to-return.ll
@@ -3,7 +3,7 @@
; Test the branch to return in BB4 is converted to return.
-define i32 @foo(i32* %x, i32 %n) {
+define i32 @foo(ptr %x, i32 %n) {
; CHECK-LABEL: foo:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r1, #1
@@ -47,7 +47,7 @@ for.body.preheader: ; preds = %entry
br i1 %min.iters.check, label %for.body.preheader1, label %middle.block
middle.block:
- %x3 = load i32, i32* %x, align 4
+ %x3 = load i32, ptr %x, align 4
%cmp.n = icmp eq i32 %n.vec, %n
br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1
@@ -58,8 +58,8 @@ for.body.preheader1: ; preds = %middle.block, %for.
for.body: ; preds = %for.body.preheader1, %for.body
%i.08 = phi i32 [ %inc, %for.body ], [ %n.vec, %for.body.preheader1 ]
%r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ]
- %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
- %v5 = load i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08
+ %v5 = load i32, ptr %arrayidx, align 4
%add = add nsw i32 %v5, %r.07
%inc = add nuw nsw i32 %i.08, 1
%exitcond = icmp eq i32 %inc, %n
diff --git a/llvm/test/CodeGen/Thumb/tbb-reuse.mir b/llvm/test/CodeGen/Thumb/tbb-reuse.mir
index b18a68528c596..a085c31568a07 100644
--- a/llvm/test/CodeGen/Thumb/tbb-reuse.mir
+++ b/llvm/test/CodeGen/Thumb/tbb-reuse.mir
@@ -49,7 +49,7 @@
}
; Function Attrs: nounwind
- declare void @llvm.stackprotector(i8*, i8**) #0
+ declare void @llvm.stackprotector(ptr, ptr) #0
attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll b/llvm/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
index 99daf7e0acf5f..8ee7cd130bc70 100644
--- a/llvm/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
+++ b/llvm/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
@@ -1,37 +1,37 @@
; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -frame-pointer=all -mcpu=cortex-a8 | FileCheck %s
; rdar://10676853
-%struct.Dict_node_struct = type { i8*, %struct.Word_file_struct*, %struct.Exp_struct*, %struct.Dict_node_struct*, %struct.Dict_node_struct* }
-%struct.Word_file_struct = type { [60 x i8], i32, %struct.Word_file_struct* }
+%struct.Dict_node_struct = type { ptr, ptr, ptr, ptr, ptr }
+%struct.Word_file_struct = type { [60 x i8], i32, ptr }
%struct.Exp_struct = type { i8, i8, i8, i8, %union.anon }
-%union.anon = type { %struct.E_list_struct* }
-%struct.E_list_struct = type { %struct.E_list_struct*, %struct.Exp_struct* }
+%union.anon = type { ptr }
+%struct.E_list_struct = type { ptr, ptr }
- at lookup_list = external hidden unnamed_addr global %struct.Dict_node_struct*, align 4
+ at lookup_list = external hidden unnamed_addr global ptr, align 4
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind
-define hidden fastcc void @rdictionary_lookup(%struct.Dict_node_struct* %dn, i8* nocapture %s) nounwind ssp {
+define hidden fastcc void @rdictionary_lookup(ptr %dn, ptr nocapture %s) nounwind ssp {
; CHECK-LABEL: rdictionary_lookup:
entry:
br label %tailrecurse
tailrecurse: ; preds = %if.then10, %entry
- %dn.tr = phi %struct.Dict_node_struct* [ %dn, %entry ], [ %9, %if.then10 ]
- %cmp = icmp eq %struct.Dict_node_struct* %dn.tr, null
+ %dn.tr = phi ptr [ %dn, %entry ], [ %9, %if.then10 ]
+ %cmp = icmp eq ptr %dn.tr, null
br i1 %cmp, label %if.end11, label %if.end
if.end: ; preds = %tailrecurse
- %string = getelementptr inbounds %struct.Dict_node_struct, %struct.Dict_node_struct* %dn.tr, i32 0, i32 0
- %0 = load i8*, i8** %string, align 4
+ %string = getelementptr inbounds %struct.Dict_node_struct, ptr %dn.tr, i32 0, i32 0
+ %0 = load ptr, ptr %string, align 4
br label %while.cond.i
while.cond.i: ; preds = %while.body.i, %if.end
- %1 = phi i8* [ %s, %if.end ], [ %incdec.ptr.i, %while.body.i ]
- %storemerge.i = phi i8* [ %0, %if.end ], [ %incdec.ptr6.i, %while.body.i ]
- %2 = load i8, i8* %1, align 1
+ %1 = phi ptr [ %s, %if.end ], [ %incdec.ptr.i, %while.body.i ]
+ %storemerge.i = phi ptr [ %0, %if.end ], [ %incdec.ptr6.i, %while.body.i ]
+ %2 = load i8, ptr %1, align 1
%cmp.i = icmp eq i8 %2, 0
- %.pre.i = load i8, i8* %storemerge.i, align 1
+ %.pre.i = load i8, ptr %storemerge.i, align 1
br i1 %cmp.i, label %lor.lhs.false.i, label %land.end.i
land.end.i: ; preds = %while.cond.i
@@ -39,8 +39,8 @@ land.end.i: ; preds = %while.cond.i
br i1 %cmp4.i, label %while.body.i, label %while.end.i
while.body.i: ; preds = %land.end.i
- %incdec.ptr.i = getelementptr inbounds i8, i8* %1, i32 1
- %incdec.ptr6.i = getelementptr inbounds i8, i8* %storemerge.i, i32 1
+ %incdec.ptr.i = getelementptr inbounds i8, ptr %1, i32 1
+ %incdec.ptr6.i = getelementptr inbounds i8, ptr %storemerge.i, i32 1
br label %while.cond.i
while.end.i: ; preds = %land.end.i
@@ -68,22 +68,22 @@ if.end3: ; preds = %dict_match.exit, %l
; CHECK: cmp
; CHECK-NOT: cbnz
%storemerge1.i3 = phi i32 [ %sub.i, %dict_match.exit ], [ 0, %lor.lhs.false.i ], [ 0, %while.end.i ]
- %right = getelementptr inbounds %struct.Dict_node_struct, %struct.Dict_node_struct* %dn.tr, i32 0, i32 4
- %4 = load %struct.Dict_node_struct*, %struct.Dict_node_struct** %right, align 4
- tail call fastcc void @rdictionary_lookup(%struct.Dict_node_struct* %4, i8* %s)
+ %right = getelementptr inbounds %struct.Dict_node_struct, ptr %dn.tr, i32 0, i32 4
+ %4 = load ptr, ptr %right, align 4
+ tail call fastcc void @rdictionary_lookup(ptr %4, ptr %s)
%cmp4 = icmp eq i32 %storemerge1.i3, 0
br i1 %cmp4, label %if.then5, label %if.end8
if.then5: ; preds = %if.end3
- %call6 = tail call fastcc i8* @xalloc(i32 20)
- %5 = bitcast i8* %call6 to %struct.Dict_node_struct*
- %6 = bitcast %struct.Dict_node_struct* %dn.tr to i8*
- tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %call6, i8* align 4 %6, i32 16, i1 false)
- %7 = load %struct.Dict_node_struct*, %struct.Dict_node_struct** @lookup_list, align 4
- %right7 = getelementptr inbounds i8, i8* %call6, i32 16
- %8 = bitcast i8* %right7 to %struct.Dict_node_struct**
- store %struct.Dict_node_struct* %7, %struct.Dict_node_struct** %8, align 4
- store %struct.Dict_node_struct* %5, %struct.Dict_node_struct** @lookup_list, align 4
+ %call6 = tail call fastcc ptr @xalloc(i32 20)
+ %5 = bitcast ptr %call6 to ptr
+ %6 = bitcast ptr %dn.tr to ptr
+ tail call void @llvm.memcpy.p0.p0.i32(ptr align 4 %call6, ptr align 4 %6, i32 16, i1 false)
+ %7 = load ptr, ptr @lookup_list, align 4
+ %right7 = getelementptr inbounds i8, ptr %call6, i32 16
+ %8 = bitcast ptr %right7 to ptr
+ store ptr %7, ptr %8, align 4
+ store ptr %5, ptr @lookup_list, align 4
br label %if.then10
if.end8: ; preds = %if.end3
@@ -91,8 +91,8 @@ if.end8: ; preds = %if.end3
br i1 %cmp9, label %if.then10, label %if.end11
if.then10: ; preds = %if.end8, %if.then5, %dict_match.exit
- %left = getelementptr inbounds %struct.Dict_node_struct, %struct.Dict_node_struct* %dn.tr, i32 0, i32 3
- %9 = load %struct.Dict_node_struct*, %struct.Dict_node_struct** %left, align 4
+ %left = getelementptr inbounds %struct.Dict_node_struct, ptr %dn.tr, i32 0, i32 3
+ %9 = load ptr, ptr %left, align 4
br label %tailrecurse
if.end11: ; preds = %if.end8, %tailrecurse
@@ -100,4 +100,4 @@ if.end11: ; preds = %if.end8, %tailrecur
}
; Materializable
-declare hidden fastcc i8* @xalloc(i32) nounwind ssp
+declare hidden fastcc ptr @xalloc(i32) nounwind ssp
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir
index ecaf68d90a954..84ff0d78993d5 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
--- |
- define hidden i32 @max_min_add_reduce(i8* %input_1_vect, i8* %input_2_vect, i32 %input_1_offset, i32 %input_2_offset, i32* %output, i32 %out_offset, i32 %out_mult, i32 %out_shift, i32 %out_activation_min, i32 %out_activation_max, i32 %block_size) local_unnamed_addr #0 {
+ define hidden i32 @max_min_add_reduce(ptr %input_1_vect, ptr %input_2_vect, i32 %input_1_offset, i32 %input_2_offset, ptr %output, i32 %out_offset, i32 %out_mult, i32 %out_shift, i32 %out_activation_min, i32 %out_activation_max, i32 %block_size) local_unnamed_addr #0 {
entry:
%add = add i32 %block_size, 3
%div = lshr i32 %add, 2
@@ -14,7 +14,7 @@
%.splat.i42 = shufflevector <4 x i32> %.splatinsert.i41, <4 x i32> undef, <4 x i32> zeroinitializer
%.splatinsert.i = insertelement <4 x i32> undef, i32 %out_activation_max, i32 0
%.splat.i = shufflevector <4 x i32> %.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer
- %scevgep = getelementptr i32, i32* %output, i32 -1
+ %scevgep = getelementptr i32, ptr %output, i32 -1
br label %for.body
for.cond.cleanup: ; preds = %for.body, %entry
@@ -22,21 +22,21 @@
for.body: ; preds = %for.body, %for.body.lr.ph
%lsr.iv3 = phi i32 [ %lsr.iv.next, %for.body ], [ %div, %for.body.lr.ph ]
- %lsr.iv = phi i32* [ %scevgep1, %for.body ], [ %scevgep, %for.body.lr.ph ]
- %input_1_vect.addr.052 = phi i8* [ %input_1_vect, %for.body.lr.ph ], [ %add.ptr, %for.body ]
- %input_2_vect.addr.051 = phi i8* [ %input_2_vect, %for.body.lr.ph ], [ %add.ptr14, %for.body ]
+ %lsr.iv = phi ptr [ %scevgep1, %for.body ], [ %scevgep, %for.body.lr.ph ]
+ %input_1_vect.addr.052 = phi ptr [ %input_1_vect, %for.body.lr.ph ], [ %add.ptr, %for.body ]
+ %input_2_vect.addr.051 = phi ptr [ %input_2_vect, %for.body.lr.ph ], [ %add.ptr14, %for.body ]
%num_elements.049 = phi i32 [ %block_size, %for.body.lr.ph ], [ %sub, %for.body ]
- %input_2_cast = bitcast i8* %input_2_vect.addr.051 to <4 x i32>*
- %input_1_cast = bitcast i8* %input_1_vect.addr.052 to <4 x i32>*
- %scevgep2 = getelementptr i32, i32* %lsr.iv, i32 1
+ %input_2_cast = bitcast ptr %input_2_vect.addr.051 to ptr
+ %input_1_cast = bitcast ptr %input_1_vect.addr.052 to ptr
+ %scevgep2 = getelementptr i32, ptr %lsr.iv, i32 1
%pred = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %num_elements.049)
- %load.1 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %input_1_cast, i32 4, <4 x i1> %pred, <4 x i32> undef)
+ %load.1 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %input_1_cast, i32 4, <4 x i1> %pred, <4 x i32> undef)
%insert.input_1_offset = insertelement <4 x i32> undef, i32 %input_1_offset, i32 0
%splat.input_1_offset = shufflevector <4 x i32> %insert.input_1_offset, <4 x i32> undef, <4 x i32> zeroinitializer
%insert.input_2_offset = insertelement <4 x i32> undef, i32 %input_2_offset, i32 0
%splat.input_2_offset = shufflevector <4 x i32> %insert.input_2_offset, <4 x i32> undef, <4 x i32> zeroinitializer
%add.1 = add <4 x i32> %load.1, %splat.input_1_offset
- %load.2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %input_2_cast, i32 4, <4 x i1> %pred, <4 x i32> undef)
+ %load.2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %input_2_cast, i32 4, <4 x i1> %pred, <4 x i32> undef)
%add.2 = add <4 x i32> %load.2, %splat.input_2_offset
%mul = mul <4 x i32> %add.1, %add.2
%insert.output = insertelement <4 x i32> undef, i32 %out_offset, i32 0
@@ -45,19 +45,19 @@
%max = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %add7, <4 x i32> %.splat.i42, i32 1, <4 x i1> %pred, <4 x i32> undef)
%min = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %max, <4 x i32> %.splat.i, i32 1, <4 x i1> %pred, <4 x i32> undef)
%reduce = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %min)
- store i32 %reduce, i32* %scevgep2
- %add.ptr = getelementptr inbounds i8, i8* %input_1_vect.addr.052, i32 4
- %add.ptr14 = getelementptr inbounds i8, i8* %input_2_vect.addr.051, i32 4
+ store i32 %reduce, ptr %scevgep2
+ %add.ptr = getelementptr inbounds i8, ptr %input_1_vect.addr.052, i32 4
+ %add.ptr14 = getelementptr inbounds i8, ptr %input_2_vect.addr.051, i32 4
%sub = add i32 %num_elements.049, -4
%iv.next = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv3, i32 1)
%cmp = icmp ne i32 %iv.next, 0
- %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
+ %scevgep1 = getelementptr i32, ptr %lsr.iv, i32 1
%lsr.iv.next = add i32 %lsr.iv3, -1
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1
- declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #2
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #3
+ declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #2
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #3
declare <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) #1
declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) #1
declare i1 @llvm.test.set.loop.iterations.i32(i32) #4
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll
index dcb57a5ea6afd..13080fcfa1357 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
-define void @arm_cmplx_dot_prod_f32(float* %pSrcA, float* %pSrcB, i32 %numSamples, float* nocapture %realResult, float* nocapture %imagResult) {
+define void @arm_cmplx_dot_prod_f32(ptr %pSrcA, ptr %pSrcB, i32 %numSamples, ptr nocapture %realResult, ptr nocapture %imagResult) {
; CHECK-LABEL: arm_cmplx_dot_prod_f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
@@ -84,51 +84,51 @@ entry:
br i1 %cmp, label %while.body.preheader, label %if.else
while.body.preheader: ; preds = %entry
- %vecSrcA.0.in108 = bitcast float* %pSrcA to <4 x float>*
- %vecSrcA.0109 = load <4 x float>, <4 x float>* %vecSrcA.0.in108, align 4
- %vecSrcB.0.in106 = bitcast float* %pSrcB to <4 x float>*
- %vecSrcB.0107 = load <4 x float>, <4 x float>* %vecSrcB.0.in106, align 4
- %pSrcB.addr.0105 = getelementptr inbounds float, float* %pSrcB, i32 4
- %pSrcA.addr.0104 = getelementptr inbounds float, float* %pSrcA, i32 4
+ %vecSrcA.0.in108 = bitcast ptr %pSrcA to ptr
+ %vecSrcA.0109 = load <4 x float>, ptr %vecSrcA.0.in108, align 4
+ %vecSrcB.0.in106 = bitcast ptr %pSrcB to ptr
+ %vecSrcB.0107 = load <4 x float>, ptr %vecSrcB.0.in106, align 4
+ %pSrcB.addr.0105 = getelementptr inbounds float, ptr %pSrcB, i32 4
+ %pSrcA.addr.0104 = getelementptr inbounds float, ptr %pSrcA, i32 4
%shr = lshr i32 %numSamples, 2
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
%vecSrcA.0118 = phi <4 x float> [ %vecSrcA.0, %while.body ], [ %vecSrcA.0109, %while.body.preheader ]
%vecSrcB.0117 = phi <4 x float> [ %vecSrcB.0, %while.body ], [ %vecSrcB.0107, %while.body.preheader ]
- %pSrcB.addr.0116 = phi float* [ %pSrcB.addr.0, %while.body ], [ %pSrcB.addr.0105, %while.body.preheader ]
- %pSrcA.addr.0115 = phi float* [ %pSrcA.addr.0, %while.body ], [ %pSrcA.addr.0104, %while.body.preheader ]
+ %pSrcB.addr.0116 = phi ptr [ %pSrcB.addr.0, %while.body ], [ %pSrcB.addr.0105, %while.body.preheader ]
+ %pSrcA.addr.0115 = phi ptr [ %pSrcA.addr.0, %while.body ], [ %pSrcA.addr.0104, %while.body.preheader ]
%vec_acc.0114 = phi <4 x float> [ %7, %while.body ], [ zeroinitializer, %while.body.preheader ]
- %vecSrcB.0.in.in113 = phi float* [ %add.ptr4, %while.body ], [ %pSrcB, %while.body.preheader ]
- %vecSrcA.0.in.in112 = phi float* [ %add.ptr3, %while.body ], [ %pSrcA, %while.body.preheader ]
+ %vecSrcB.0.in.in113 = phi ptr [ %add.ptr4, %while.body ], [ %pSrcB, %while.body.preheader ]
+ %vecSrcA.0.in.in112 = phi ptr [ %add.ptr3, %while.body ], [ %pSrcA, %while.body.preheader ]
%blkCnt.0.in111 = phi i32 [ %blkCnt.0, %while.body ], [ %shr, %while.body.preheader ]
%blkCnt.0 = add nsw i32 %blkCnt.0.in111, -1
%0 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> %vec_acc.0114, <4 x float> %vecSrcA.0118, <4 x float> %vecSrcB.0117)
- %1 = bitcast float* %pSrcA.addr.0115 to <4 x float>*
- %2 = load <4 x float>, <4 x float>* %1, align 4
- %add.ptr3 = getelementptr inbounds float, float* %vecSrcA.0.in.in112, i32 8
+ %1 = bitcast ptr %pSrcA.addr.0115 to ptr
+ %2 = load <4 x float>, ptr %1, align 4
+ %add.ptr3 = getelementptr inbounds float, ptr %vecSrcA.0.in.in112, i32 8
%3 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> %0, <4 x float> %vecSrcA.0118, <4 x float> %vecSrcB.0117)
- %4 = bitcast float* %pSrcB.addr.0116 to <4 x float>*
- %5 = load <4 x float>, <4 x float>* %4, align 4
- %add.ptr4 = getelementptr inbounds float, float* %vecSrcB.0.in.in113, i32 8
+ %4 = bitcast ptr %pSrcB.addr.0116 to ptr
+ %5 = load <4 x float>, ptr %4, align 4
+ %add.ptr4 = getelementptr inbounds float, ptr %vecSrcB.0.in.in113, i32 8
%6 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> %3, <4 x float> %2, <4 x float> %5)
%7 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> %6, <4 x float> %2, <4 x float> %5)
- %pSrcA.addr.0 = getelementptr inbounds float, float* %vecSrcA.0.in.in112, i32 12
- %pSrcB.addr.0 = getelementptr inbounds float, float* %vecSrcB.0.in.in113, i32 12
- %vecSrcB.0.in = bitcast float* %add.ptr4 to <4 x float>*
- %vecSrcB.0 = load <4 x float>, <4 x float>* %vecSrcB.0.in, align 4
- %vecSrcA.0.in = bitcast float* %add.ptr3 to <4 x float>*
- %vecSrcA.0 = load <4 x float>, <4 x float>* %vecSrcA.0.in, align 4
+ %pSrcA.addr.0 = getelementptr inbounds float, ptr %vecSrcA.0.in.in112, i32 12
+ %pSrcB.addr.0 = getelementptr inbounds float, ptr %vecSrcB.0.in.in113, i32 12
+ %vecSrcB.0.in = bitcast ptr %add.ptr4 to ptr
+ %vecSrcB.0 = load <4 x float>, ptr %vecSrcB.0.in, align 4
+ %vecSrcA.0.in = bitcast ptr %add.ptr3 to ptr
+ %vecSrcA.0 = load <4 x float>, ptr %vecSrcA.0.in, align 4
%cmp2 = icmp sgt i32 %blkCnt.0.in111, 2
br i1 %cmp2, label %while.body, label %while.end
while.end: ; preds = %while.body
%8 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> %7, <4 x float> %vecSrcA.0, <4 x float> %vecSrcB.0)
- %9 = bitcast float* %pSrcA.addr.0 to <4 x float>*
- %10 = load <4 x float>, <4 x float>* %9, align 4
+ %9 = bitcast ptr %pSrcA.addr.0 to ptr
+ %10 = load <4 x float>, ptr %9, align 4
%11 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> %8, <4 x float> %vecSrcA.0, <4 x float> %vecSrcB.0)
- %12 = bitcast float* %pSrcB.addr.0 to <4 x float>*
- %13 = load <4 x float>, <4 x float>* %12, align 4
+ %12 = bitcast ptr %pSrcB.addr.0 to ptr
+ %13 = load <4 x float>, ptr %12, align 4
%14 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> %11, <4 x float> %10, <4 x float> %13)
%15 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> %14, <4 x float> %10, <4 x float> %13)
%and = shl i32 %numSamples, 1
@@ -138,12 +138,12 @@ while.end: ; preds = %while.body
while.body9: ; preds = %while.end
%16 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %mul)
- %add.ptr10 = getelementptr inbounds float, float* %vecSrcA.0.in.in112, i32 16
- %add.ptr11 = getelementptr inbounds float, float* %vecSrcB.0.in.in113, i32 16
- %17 = bitcast float* %add.ptr10 to <4 x float>*
- %18 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* nonnull %17, i32 4, <4 x i1> %16, <4 x float> zeroinitializer)
- %19 = bitcast float* %add.ptr11 to <4 x float>*
- %20 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* nonnull %19, i32 4, <4 x i1> %16, <4 x float> zeroinitializer)
+ %add.ptr10 = getelementptr inbounds float, ptr %vecSrcA.0.in.in112, i32 16
+ %add.ptr11 = getelementptr inbounds float, ptr %vecSrcB.0.in.in113, i32 16
+ %17 = bitcast ptr %add.ptr10 to ptr
+ %18 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull %17, i32 4, <4 x i1> %16, <4 x float> zeroinitializer)
+ %19 = bitcast ptr %add.ptr11 to ptr
+ %20 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull %19, i32 4, <4 x i1> %16, <4 x float> zeroinitializer)
%21 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 0, <4 x float> %15, <4 x float> %18, <4 x float> %20, <4 x i1> %16)
%22 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 1, <4 x float> %21, <4 x float> %18, <4 x float> %20, <4 x i1> %16)
%cmp8 = icmp ugt i32 %mul, 4
@@ -156,17 +156,17 @@ if.else: ; preds = %entry
do.body: ; preds = %do.body, %if.else
%blkCnt.2 = phi i32 [ %mul14, %if.else ], [ %sub18, %do.body ]
%vec_acc.2 = phi <4 x float> [ zeroinitializer, %if.else ], [ %29, %do.body ]
- %pSrcB.addr.2 = phi float* [ %pSrcB, %if.else ], [ %add.ptr17, %do.body ]
- %pSrcA.addr.2 = phi float* [ %pSrcA, %if.else ], [ %add.ptr16, %do.body ]
+ %pSrcB.addr.2 = phi ptr [ %pSrcB, %if.else ], [ %add.ptr17, %do.body ]
+ %pSrcA.addr.2 = phi ptr [ %pSrcA, %if.else ], [ %add.ptr16, %do.body ]
%23 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.2)
- %24 = bitcast float* %pSrcA.addr.2 to <4 x float>*
- %25 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %24, i32 4, <4 x i1> %23, <4 x float> zeroinitializer)
- %26 = bitcast float* %pSrcB.addr.2 to <4 x float>*
- %27 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %26, i32 4, <4 x i1> %23, <4 x float> zeroinitializer)
+ %24 = bitcast ptr %pSrcA.addr.2 to ptr
+ %25 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %24, i32 4, <4 x i1> %23, <4 x float> zeroinitializer)
+ %26 = bitcast ptr %pSrcB.addr.2 to ptr
+ %27 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %26, i32 4, <4 x i1> %23, <4 x float> zeroinitializer)
%28 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 0, <4 x float> %vec_acc.2, <4 x float> %25, <4 x float> %27, <4 x i1> %23)
%29 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 1, <4 x float> %28, <4 x float> %25, <4 x float> %27, <4 x i1> %23)
- %add.ptr16 = getelementptr inbounds float, float* %pSrcA.addr.2, i32 4
- %add.ptr17 = getelementptr inbounds float, float* %pSrcB.addr.2, i32 4
+ %add.ptr16 = getelementptr inbounds float, ptr %pSrcA.addr.2, i32 4
+ %add.ptr17 = getelementptr inbounds float, ptr %pSrcB.addr.2, i32 4
%sub18 = add nsw i32 %blkCnt.2, -4
%cmp19 = icmp sgt i32 %blkCnt.2, 4
br i1 %cmp19, label %do.body, label %if.end
@@ -179,19 +179,19 @@ if.end: ; preds = %do.body, %while.bod
%32 = extractelement <4 x float> %vec_acc.3, i32 1
%33 = extractelement <4 x float> %vec_acc.3, i32 3
%add20 = fadd fast float %32, %33
- store float %add, float* %realResult, align 4
- store float %add20, float* %imagResult, align 4
+ store float %add, ptr %realResult, align 4
+ store float %add20, ptr %imagResult, align 4
ret void
while.body9.1: ; preds = %while.body9
%sub12 = add nsw i32 %mul, -4
%34 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %sub12)
- %add.ptr10.1 = getelementptr inbounds float, float* %vecSrcA.0.in.in112, i32 20
- %add.ptr11.1 = getelementptr inbounds float, float* %vecSrcB.0.in.in113, i32 20
- %35 = bitcast float* %add.ptr10.1 to <4 x float>*
- %36 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* nonnull %35, i32 4, <4 x i1> %34, <4 x float> zeroinitializer)
- %37 = bitcast float* %add.ptr11.1 to <4 x float>*
- %38 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* nonnull %37, i32 4, <4 x i1> %34, <4 x float> zeroinitializer)
+ %add.ptr10.1 = getelementptr inbounds float, ptr %vecSrcA.0.in.in112, i32 20
+ %add.ptr11.1 = getelementptr inbounds float, ptr %vecSrcB.0.in.in113, i32 20
+ %35 = bitcast ptr %add.ptr10.1 to ptr
+ %36 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull %35, i32 4, <4 x i1> %34, <4 x float> zeroinitializer)
+ %37 = bitcast ptr %add.ptr11.1 to ptr
+ %38 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull %37, i32 4, <4 x i1> %34, <4 x float> zeroinitializer)
%39 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 0, <4 x float> %22, <4 x float> %36, <4 x float> %38, <4 x i1> %34)
%40 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 1, <4 x float> %39, <4 x float> %36, <4 x float> %38, <4 x i1> %34)
br label %if.end
@@ -199,5 +199,5 @@ while.body9.1: ; preds = %while.body9
declare <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32, <4 x float>, <4 x float>, <4 x float>) #1
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1
-declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #2
+declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) #2
declare <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32, <4 x float>, <4 x float>, <4 x float>, <4 x i1>) #1
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir
index dabebf4aeb77a..40231e1355974 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir
@@ -2,53 +2,53 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
--- |
- %struct.arm_biquad_casd_df1_inst_q31 = type { i32*, i32*, i32, i32 }
+ %struct.arm_biquad_casd_df1_inst_q31 = type { ptr, ptr, i32, i32 }
- define hidden void @arm_biquad_cascade_df1_q31(%struct.arm_biquad_casd_df1_inst_q31* nocapture readonly %arg, i32* nocapture readonly %arg1, i32* nocapture %arg2, i32 %arg3) {
+ define hidden void @arm_biquad_cascade_df1_q31(ptr nocapture readonly %arg, ptr nocapture readonly %arg1, ptr nocapture %arg2, i32 %arg3) {
bb:
- %i = bitcast %struct.arm_biquad_casd_df1_inst_q31* %arg to i32**
- %i4 = load i32*, i32** %i, align 4
- %i5 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 1
- %i6 = load i32*, i32** %i5, align 4
- %i7 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 2
- %i8 = load i32, i32* %i7, align 4
+ %i = bitcast ptr %arg to ptr
+ %i4 = load ptr, ptr %i, align 4
+ %i5 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, ptr %arg, i32 0, i32 1
+ %i6 = load ptr, ptr %i5, align 4
+ %i7 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, ptr %arg, i32 0, i32 2
+ %i8 = load i32, ptr %i7, align 4
%i9 = sub i32 31, %i8
- %i10 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 3
- %i11 = load i32, i32* %i10, align 4
+ %i10 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, ptr %arg, i32 0, i32 3
+ %i11 = load i32, ptr %i10, align 4
br label %bb12
bb12: ; preds = %bb74, %bb
- %i13 = phi i32* [ %i6, %bb ], [ %i18, %bb74 ]
- %i14 = phi i32* [ %i4, %bb ], [ %i85, %bb74 ]
- %i15 = phi i32* [ %arg1, %bb ], [ %arg2, %bb74 ]
+ %i13 = phi ptr [ %i6, %bb ], [ %i18, %bb74 ]
+ %i14 = phi ptr [ %i4, %bb ], [ %i85, %bb74 ]
+ %i15 = phi ptr [ %arg1, %bb ], [ %arg2, %bb74 ]
%i16 = phi i32 [ %i11, %bb ], [ %i89, %bb74 ]
- %i18 = getelementptr inbounds i32, i32* %i13, i32 5
- %i19 = load i32, i32* %i14, align 4
- %i20 = getelementptr inbounds i32, i32* %i14, i32 1
- %i21 = load i32, i32* %i20, align 4
- %i22 = getelementptr inbounds i32, i32* %i14, i32 2
- %i23 = load i32, i32* %i22, align 4
- %i24 = getelementptr inbounds i32, i32* %i14, i32 3
- %i25 = load i32, i32* %i24, align 4
+ %i18 = getelementptr inbounds i32, ptr %i13, i32 5
+ %i19 = load i32, ptr %i14, align 4
+ %i20 = getelementptr inbounds i32, ptr %i14, i32 1
+ %i21 = load i32, ptr %i20, align 4
+ %i22 = getelementptr inbounds i32, ptr %i14, i32 2
+ %i23 = load i32, ptr %i22, align 4
+ %i24 = getelementptr inbounds i32, ptr %i14, i32 3
+ %i25 = load i32, ptr %i24, align 4
%i26 = call i1 @llvm.test.set.loop.iterations.i32(i32 %arg3)
br i1 %i26, label %bb27, label %bb74
bb27: ; preds = %bb12
- %i28 = getelementptr inbounds i32, i32* %i13, i32 4
- %i29 = load i32, i32* %i28, align 4
- %i30 = getelementptr inbounds i32, i32* %i13, i32 3
- %i31 = load i32, i32* %i30, align 4
- %i32 = getelementptr inbounds i32, i32* %i13, i32 2
- %i33 = load i32, i32* %i32, align 4
- %i34 = getelementptr inbounds i32, i32* %i13, i32 1
- %i35 = load i32, i32* %i34, align 4
- %i36 = load i32, i32* %i13, align 4
+ %i28 = getelementptr inbounds i32, ptr %i13, i32 4
+ %i29 = load i32, ptr %i28, align 4
+ %i30 = getelementptr inbounds i32, ptr %i13, i32 3
+ %i31 = load i32, ptr %i30, align 4
+ %i32 = getelementptr inbounds i32, ptr %i13, i32 2
+ %i33 = load i32, ptr %i32, align 4
+ %i34 = getelementptr inbounds i32, ptr %i13, i32 1
+ %i35 = load i32, ptr %i34, align 4
+ %i36 = load i32, ptr %i13, align 4
br label %bb37
bb37: ; preds = %bb37, %bb27
%lsr.iv = phi i32 [ %lsr.iv.next, %bb37 ], [ %arg3, %bb27 ]
- %i38 = phi i32* [ %i15, %bb27 ], [ %i51, %bb37 ]
- %i39 = phi i32* [ %arg2, %bb27 ], [ %i69, %bb37 ]
+ %i38 = phi ptr [ %i15, %bb27 ], [ %i51, %bb37 ]
+ %i39 = phi ptr [ %arg2, %bb27 ], [ %i69, %bb37 ]
%i40 = phi i32 [ %i25, %bb27 ], [ %i41, %bb37 ]
%i41 = phi i32 [ %i23, %bb27 ], [ %i68, %bb37 ]
%i42 = phi i32 [ %i21, %bb27 ], [ %i43, %bb37 ]
@@ -59,8 +59,8 @@
%i48 = sext i32 %i35 to i64
%i49 = sext i32 %i36 to i64
%i50 = zext i32 %i9 to i64
- %i51 = getelementptr inbounds i32, i32* %i38, i32 1
- %i52 = load i32, i32* %i38, align 4
+ %i51 = getelementptr inbounds i32, ptr %i38, i32 1
+ %i52 = load i32, ptr %i38, align 4
%i53 = sext i32 %i52 to i64
%i54 = mul nsw i64 %i53, %i49
%i55 = sext i32 %i43 to i64
@@ -77,8 +77,8 @@
%i66 = add i64 %i65, %i54
%i67 = ashr i64 %i66, %i50
%i68 = trunc i64 %i67 to i32
- %i69 = getelementptr inbounds i32, i32* %i39, i32 1
- store i32 %i68, i32* %i39, align 4
+ %i69 = getelementptr inbounds i32, ptr %i39, i32 1
+ store i32 %i68, ptr %i39, align 4
%i70 = call i32 @llvm.loop.decrement.reg.i32(i32 %lsr.iv, i32 1)
%i71 = icmp ne i32 %i70, 0
%lsr.iv.next = add i32 %lsr.iv, -1
@@ -93,20 +93,20 @@
%i76 = phi i32 [ %i21, %bb12 ], [ %i43, %bb72 ]
%i77 = phi i32 [ %i23, %bb12 ], [ %i73, %bb72 ]
%i78 = phi i32 [ %i25, %bb12 ], [ %i41, %bb72 ]
- store i32 %i75, i32* %i14, align 4
- %i79 = bitcast i32* %i14 to i8*
- %i80 = getelementptr inbounds i8, i8* %i79, i32 4
- %i81 = bitcast i8* %i80 to i32*
- store i32 %i76, i32* %i81, align 4
- %i82 = bitcast i32* %i14 to i8*
- %i83 = getelementptr inbounds i8, i8* %i82, i32 8
- %i84 = bitcast i8* %i83 to i32*
- store i32 %i77, i32* %i84, align 4
- %i85 = getelementptr inbounds i32, i32* %i14, i32 4
- %i86 = bitcast i32* %i14 to i8*
- %i87 = getelementptr inbounds i8, i8* %i86, i32 12
- %i88 = bitcast i8* %i87 to i32*
- store i32 %i78, i32* %i88, align 4
+ store i32 %i75, ptr %i14, align 4
+ %i79 = bitcast ptr %i14 to ptr
+ %i80 = getelementptr inbounds i8, ptr %i79, i32 4
+ %i81 = bitcast ptr %i80 to ptr
+ store i32 %i76, ptr %i81, align 4
+ %i82 = bitcast ptr %i14 to ptr
+ %i83 = getelementptr inbounds i8, ptr %i82, i32 8
+ %i84 = bitcast ptr %i83 to ptr
+ store i32 %i77, ptr %i84, align 4
+ %i85 = getelementptr inbounds i32, ptr %i14, i32 4
+ %i86 = bitcast ptr %i14 to ptr
+ %i87 = getelementptr inbounds i8, ptr %i86, i32 12
+ %i88 = bitcast ptr %i87 to ptr
+ store i32 %i78, ptr %i88, align 4
%i89 = add i32 %i16, -1
%i90 = icmp eq i32 %i89, 0
br i1 %i90, label %bb91, label %bb12
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir
index 10574ba7320e6..789429abf7782 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir
@@ -1,56 +1,56 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -o - -verify-machineinstrs | FileCheck %s
--- |
- %struct.arm_biquad_casd_df1_inst_q31 = type { i32*, i32*, i32, i32 }
+ %struct.arm_biquad_casd_df1_inst_q31 = type { ptr, ptr, i32, i32 }
; Function Attrs: optsize
- define hidden void @arm_biquad_cascade_df1_q31(%struct.arm_biquad_casd_df1_inst_q31* nocapture readonly %arg, i32* nocapture readonly %arg1, i32* nocapture %arg2, i32 %arg3) #0 {
+ define hidden void @arm_biquad_cascade_df1_q31(ptr nocapture readonly %arg, ptr nocapture readonly %arg1, ptr nocapture %arg2, i32 %arg3) #0 {
bb:
- %i = bitcast %struct.arm_biquad_casd_df1_inst_q31* %arg to i32**
- %i4 = load i32*, i32** %i, align 4
- %i5 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 1
- %i6 = load i32*, i32** %i5, align 4
- %i7 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 2
- %i8 = load i32, i32* %i7, align 4
+ %i = bitcast ptr %arg to ptr
+ %i4 = load ptr, ptr %i, align 4
+ %i5 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, ptr %arg, i32 0, i32 1
+ %i6 = load ptr, ptr %i5, align 4
+ %i7 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, ptr %arg, i32 0, i32 2
+ %i8 = load i32, ptr %i7, align 4
%i9 = sub i32 31, %i8
- %i10 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 3
- %i11 = load i32, i32* %i10, align 4
+ %i10 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, ptr %arg, i32 0, i32 3
+ %i11 = load i32, ptr %i10, align 4
br label %bb12
bb12: ; preds = %bb74, %bb
- %i13 = phi i32* [ %i6, %bb ], [ %i18, %bb74 ]
- %i14 = phi i32* [ %i4, %bb ], [ %i85, %bb74 ]
- %i15 = phi i32* [ %arg1, %bb ], [ %arg2, %bb74 ]
+ %i13 = phi ptr [ %i6, %bb ], [ %i18, %bb74 ]
+ %i14 = phi ptr [ %i4, %bb ], [ %i85, %bb74 ]
+ %i15 = phi ptr [ %arg1, %bb ], [ %arg2, %bb74 ]
%i16 = phi i32 [ %i11, %bb ], [ %i89, %bb74 ]
- %i18 = getelementptr inbounds i32, i32* %i13, i32 5
- %i19 = load i32, i32* %i14, align 4
- %i20 = getelementptr inbounds i32, i32* %i14, i32 1
- %i21 = load i32, i32* %i20, align 4
- %i22 = getelementptr inbounds i32, i32* %i14, i32 2
- %i23 = load i32, i32* %i22, align 4
- %i24 = getelementptr inbounds i32, i32* %i14, i32 3
- %i25 = load i32, i32* %i24, align 4
+ %i18 = getelementptr inbounds i32, ptr %i13, i32 5
+ %i19 = load i32, ptr %i14, align 4
+ %i20 = getelementptr inbounds i32, ptr %i14, i32 1
+ %i21 = load i32, ptr %i20, align 4
+ %i22 = getelementptr inbounds i32, ptr %i14, i32 2
+ %i23 = load i32, ptr %i22, align 4
+ %i24 = getelementptr inbounds i32, ptr %i14, i32 3
+ %i25 = load i32, ptr %i24, align 4
%i26 = call { i32, i1 } @llvm.test.start.loop.iterations.i32(i32 %arg3)
%i26.0 = extractvalue { i32, i1 } %i26, 0
%i26.1 = extractvalue { i32, i1 } %i26, 1
br i1 %i26.1, label %bb27, label %bb74
bb27: ; preds = %bb12
- %i28 = getelementptr inbounds i32, i32* %i13, i32 4
- %i29 = load i32, i32* %i28, align 4
- %i30 = getelementptr inbounds i32, i32* %i13, i32 3
- %i31 = load i32, i32* %i30, align 4
- %i32 = getelementptr inbounds i32, i32* %i13, i32 2
- %i33 = load i32, i32* %i32, align 4
- %i34 = getelementptr inbounds i32, i32* %i13, i32 1
- %i35 = load i32, i32* %i34, align 4
- %i36 = load i32, i32* %i13, align 4
+ %i28 = getelementptr inbounds i32, ptr %i13, i32 4
+ %i29 = load i32, ptr %i28, align 4
+ %i30 = getelementptr inbounds i32, ptr %i13, i32 3
+ %i31 = load i32, ptr %i30, align 4
+ %i32 = getelementptr inbounds i32, ptr %i13, i32 2
+ %i33 = load i32, ptr %i32, align 4
+ %i34 = getelementptr inbounds i32, ptr %i13, i32 1
+ %i35 = load i32, ptr %i34, align 4
+ %i36 = load i32, ptr %i13, align 4
br label %bb37
bb37: ; preds = %bb37, %bb27
%lsr.iv = phi i32 [ %i70, %bb37 ], [ %i26.0, %bb27 ]
- %i38 = phi i32* [ %i15, %bb27 ], [ %i51, %bb37 ]
- %i39 = phi i32* [ %arg2, %bb27 ], [ %i69, %bb37 ]
+ %i38 = phi ptr [ %i15, %bb27 ], [ %i51, %bb37 ]
+ %i39 = phi ptr [ %arg2, %bb27 ], [ %i69, %bb37 ]
%i40 = phi i32 [ %i25, %bb27 ], [ %i41, %bb37 ]
%i41 = phi i32 [ %i23, %bb27 ], [ %i68, %bb37 ]
%i42 = phi i32 [ %i21, %bb27 ], [ %i43, %bb37 ]
@@ -61,8 +61,8 @@
%i48 = sext i32 %i35 to i64
%i49 = sext i32 %i36 to i64
%i50 = zext i32 %i9 to i64
- %i51 = getelementptr inbounds i32, i32* %i38, i32 1
- %i52 = load i32, i32* %i38, align 4
+ %i51 = getelementptr inbounds i32, ptr %i38, i32 1
+ %i52 = load i32, ptr %i38, align 4
%i53 = sext i32 %i52 to i64
%i54 = mul nsw i64 %i53, %i49
%i55 = sext i32 %i43 to i64
@@ -79,8 +79,8 @@
%i66 = add i64 %i65, %i54
%i67 = ashr i64 %i66, %i50
%i68 = trunc i64 %i67 to i32
- %i69 = getelementptr inbounds i32, i32* %i39, i32 1
- store i32 %i68, i32* %i39, align 4
+ %i69 = getelementptr inbounds i32, ptr %i39, i32 1
+ store i32 %i68, ptr %i39, align 4
%i70 = call i32 @llvm.loop.decrement.reg.i32(i32 %lsr.iv, i32 1)
%i71 = icmp ne i32 %i70, 0
br i1 %i71, label %bb37, label %bb72
@@ -94,20 +94,20 @@
%i76 = phi i32 [ %i21, %bb12 ], [ %i43, %bb72 ]
%i77 = phi i32 [ %i23, %bb12 ], [ %i73, %bb72 ]
%i78 = phi i32 [ %i25, %bb12 ], [ %i41, %bb72 ]
- store i32 %i75, i32* %i14, align 4
- %i79 = bitcast i32* %i14 to i8*
- %i80 = getelementptr inbounds i8, i8* %i79, i32 4
- %i81 = bitcast i8* %i80 to i32*
- store i32 %i76, i32* %i81, align 4
- %i82 = bitcast i32* %i14 to i8*
- %i83 = getelementptr inbounds i8, i8* %i82, i32 8
- %i84 = bitcast i8* %i83 to i32*
- store i32 %i77, i32* %i84, align 4
- %i85 = getelementptr inbounds i32, i32* %i14, i32 4
- %i86 = bitcast i32* %i14 to i8*
- %i87 = getelementptr inbounds i8, i8* %i86, i32 12
- %i88 = bitcast i8* %i87 to i32*
- store i32 %i78, i32* %i88, align 4
+ store i32 %i75, ptr %i14, align 4
+ %i79 = bitcast ptr %i14 to ptr
+ %i80 = getelementptr inbounds i8, ptr %i79, i32 4
+ %i81 = bitcast ptr %i80 to ptr
+ store i32 %i76, ptr %i81, align 4
+ %i82 = bitcast ptr %i14 to ptr
+ %i83 = getelementptr inbounds i8, ptr %i82, i32 8
+ %i84 = bitcast ptr %i83 to ptr
+ store i32 %i77, ptr %i84, align 4
+ %i85 = getelementptr inbounds i32, ptr %i14, i32 4
+ %i86 = bitcast ptr %i14 to ptr
+ %i87 = getelementptr inbounds i8, ptr %i86, i32 12
+ %i88 = bitcast ptr %i87 to ptr
+ store i32 %i78, ptr %i88, align 4
%i89 = add i32 %i16, -1
%i90 = icmp eq i32 %i89, 0
br i1 %i90, label %bb91, label %bb12
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize.mir
index 62a266e3468b3..3a8765f48cc52 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize.mir
@@ -2,54 +2,54 @@
# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
--- |
- %struct.arm_biquad_casd_df1_inst_q31 = type { i32*, i32*, i32, i32 }
+ %struct.arm_biquad_casd_df1_inst_q31 = type { ptr, ptr, i32, i32 }
; Function Attrs: optsize
- define hidden void @arm_biquad_cascade_df1_q31(%struct.arm_biquad_casd_df1_inst_q31* nocapture readonly %arg, i32* nocapture readonly %arg1, i32* nocapture %arg2, i32 %arg3) #0 {
+ define hidden void @arm_biquad_cascade_df1_q31(ptr nocapture readonly %arg, ptr nocapture readonly %arg1, ptr nocapture %arg2, i32 %arg3) #0 {
bb:
- %i = bitcast %struct.arm_biquad_casd_df1_inst_q31* %arg to i32**
- %i4 = load i32*, i32** %i, align 4
- %i5 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 1
- %i6 = load i32*, i32** %i5, align 4
- %i7 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 2
- %i8 = load i32, i32* %i7, align 4
+ %i = bitcast ptr %arg to ptr
+ %i4 = load ptr, ptr %i, align 4
+ %i5 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, ptr %arg, i32 0, i32 1
+ %i6 = load ptr, ptr %i5, align 4
+ %i7 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, ptr %arg, i32 0, i32 2
+ %i8 = load i32, ptr %i7, align 4
%i9 = sub i32 31, %i8
- %i10 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 3
- %i11 = load i32, i32* %i10, align 4
+ %i10 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, ptr %arg, i32 0, i32 3
+ %i11 = load i32, ptr %i10, align 4
br label %bb12
bb12: ; preds = %bb74, %bb
- %i13 = phi i32* [ %i6, %bb ], [ %i18, %bb74 ]
- %i14 = phi i32* [ %i4, %bb ], [ %i85, %bb74 ]
- %i15 = phi i32* [ %arg1, %bb ], [ %arg2, %bb74 ]
+ %i13 = phi ptr [ %i6, %bb ], [ %i18, %bb74 ]
+ %i14 = phi ptr [ %i4, %bb ], [ %i85, %bb74 ]
+ %i15 = phi ptr [ %arg1, %bb ], [ %arg2, %bb74 ]
%i16 = phi i32 [ %i11, %bb ], [ %i89, %bb74 ]
- %i18 = getelementptr inbounds i32, i32* %i13, i32 5
- %i19 = load i32, i32* %i14, align 4
- %i20 = getelementptr inbounds i32, i32* %i14, i32 1
- %i21 = load i32, i32* %i20, align 4
- %i22 = getelementptr inbounds i32, i32* %i14, i32 2
- %i23 = load i32, i32* %i22, align 4
- %i24 = getelementptr inbounds i32, i32* %i14, i32 3
- %i25 = load i32, i32* %i24, align 4
+ %i18 = getelementptr inbounds i32, ptr %i13, i32 5
+ %i19 = load i32, ptr %i14, align 4
+ %i20 = getelementptr inbounds i32, ptr %i14, i32 1
+ %i21 = load i32, ptr %i20, align 4
+ %i22 = getelementptr inbounds i32, ptr %i14, i32 2
+ %i23 = load i32, ptr %i22, align 4
+ %i24 = getelementptr inbounds i32, ptr %i14, i32 3
+ %i25 = load i32, ptr %i24, align 4
%i26 = call i1 @llvm.test.set.loop.iterations.i32(i32 %arg3)
br i1 %i26, label %bb27, label %bb74
bb27: ; preds = %bb12
- %i28 = getelementptr inbounds i32, i32* %i13, i32 4
- %i29 = load i32, i32* %i28, align 4
- %i30 = getelementptr inbounds i32, i32* %i13, i32 3
- %i31 = load i32, i32* %i30, align 4
- %i32 = getelementptr inbounds i32, i32* %i13, i32 2
- %i33 = load i32, i32* %i32, align 4
- %i34 = getelementptr inbounds i32, i32* %i13, i32 1
- %i35 = load i32, i32* %i34, align 4
- %i36 = load i32, i32* %i13, align 4
+ %i28 = getelementptr inbounds i32, ptr %i13, i32 4
+ %i29 = load i32, ptr %i28, align 4
+ %i30 = getelementptr inbounds i32, ptr %i13, i32 3
+ %i31 = load i32, ptr %i30, align 4
+ %i32 = getelementptr inbounds i32, ptr %i13, i32 2
+ %i33 = load i32, ptr %i32, align 4
+ %i34 = getelementptr inbounds i32, ptr %i13, i32 1
+ %i35 = load i32, ptr %i34, align 4
+ %i36 = load i32, ptr %i13, align 4
br label %bb37
bb37: ; preds = %bb37, %bb27
%lsr.iv = phi i32 [ %lsr.iv.next, %bb37 ], [ %arg3, %bb27 ]
- %i38 = phi i32* [ %i15, %bb27 ], [ %i51, %bb37 ]
- %i39 = phi i32* [ %arg2, %bb27 ], [ %i69, %bb37 ]
+ %i38 = phi ptr [ %i15, %bb27 ], [ %i51, %bb37 ]
+ %i39 = phi ptr [ %arg2, %bb27 ], [ %i69, %bb37 ]
%i40 = phi i32 [ %i25, %bb27 ], [ %i41, %bb37 ]
%i41 = phi i32 [ %i23, %bb27 ], [ %i68, %bb37 ]
%i42 = phi i32 [ %i21, %bb27 ], [ %i43, %bb37 ]
@@ -60,8 +60,8 @@
%i48 = sext i32 %i35 to i64
%i49 = sext i32 %i36 to i64
%i50 = zext i32 %i9 to i64
- %i51 = getelementptr inbounds i32, i32* %i38, i32 1
- %i52 = load i32, i32* %i38, align 4
+ %i51 = getelementptr inbounds i32, ptr %i38, i32 1
+ %i52 = load i32, ptr %i38, align 4
%i53 = sext i32 %i52 to i64
%i54 = mul nsw i64 %i53, %i49
%i55 = sext i32 %i43 to i64
@@ -78,8 +78,8 @@
%i66 = add i64 %i65, %i54
%i67 = ashr i64 %i66, %i50
%i68 = trunc i64 %i67 to i32
- %i69 = getelementptr inbounds i32, i32* %i39, i32 1
- store i32 %i68, i32* %i39, align 4
+ %i69 = getelementptr inbounds i32, ptr %i39, i32 1
+ store i32 %i68, ptr %i39, align 4
%i70 = call i32 @llvm.loop.decrement.reg.i32(i32 %lsr.iv, i32 1)
%i71 = icmp ne i32 %i70, 0
%lsr.iv.next = add i32 %lsr.iv, -1
@@ -94,20 +94,20 @@
%i76 = phi i32 [ %i21, %bb12 ], [ %i43, %bb72 ]
%i77 = phi i32 [ %i23, %bb12 ], [ %i73, %bb72 ]
%i78 = phi i32 [ %i25, %bb12 ], [ %i41, %bb72 ]
- store i32 %i75, i32* %i14, align 4
- %i79 = bitcast i32* %i14 to i8*
- %i80 = getelementptr inbounds i8, i8* %i79, i32 4
- %i81 = bitcast i8* %i80 to i32*
- store i32 %i76, i32* %i81, align 4
- %i82 = bitcast i32* %i14 to i8*
- %i83 = getelementptr inbounds i8, i8* %i82, i32 8
- %i84 = bitcast i8* %i83 to i32*
- store i32 %i77, i32* %i84, align 4
- %i85 = getelementptr inbounds i32, i32* %i14, i32 4
- %i86 = bitcast i32* %i14 to i8*
- %i87 = getelementptr inbounds i8, i8* %i86, i32 12
- %i88 = bitcast i8* %i87 to i32*
- store i32 %i78, i32* %i88, align 4
+ store i32 %i75, ptr %i14, align 4
+ %i79 = bitcast ptr %i14 to ptr
+ %i80 = getelementptr inbounds i8, ptr %i79, i32 4
+ %i81 = bitcast ptr %i80 to ptr
+ store i32 %i76, ptr %i81, align 4
+ %i82 = bitcast ptr %i14 to ptr
+ %i83 = getelementptr inbounds i8, ptr %i82, i32 8
+ %i84 = bitcast ptr %i83 to ptr
+ store i32 %i77, ptr %i84, align 4
+ %i85 = getelementptr inbounds i32, ptr %i14, i32 4
+ %i86 = bitcast ptr %i14 to ptr
+ %i87 = getelementptr inbounds i8, ptr %i86, i32 12
+ %i88 = bitcast ptr %i87 to ptr
+ store i32 %i78, ptr %i88, align 4
%i89 = add i32 %i16, -1
%i90 = icmp eq i32 %i89, 0
br i1 %i90, label %bb91, label %bb12
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/constbound.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/constbound.ll
index 580485874f5fc..79665af17ef58 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/constbound.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/constbound.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve %s -o - | FileCheck %s
-define dso_local i32 @test_500_504(i32* nocapture readonly %x) {
+define dso_local i32 @test_500_504(ptr nocapture readonly %x) {
; CHECK-LABEL: test_500_504:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -38,9 +38,9 @@ vector.body: ; preds = %vector.body, %entry
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%vec.phi = phi i32 [ 0, %entry ], [ %4, %vector.body ]
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 500)
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
%2 = select <4 x i1> %active.lane.mask, <4 x i32> %wide.masked.load, <4 x i32> zeroinitializer
%3 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %2)
%4 = add i32 %3, %vec.phi
@@ -52,7 +52,7 @@ for.cond.cleanup: ; preds = %vector.body
ret i32 %4
}
-define dso_local i32 @test_501_504(i32* nocapture readonly %x) {
+define dso_local i32 @test_501_504(ptr nocapture readonly %x) {
; CHECK-LABEL: test_501_504:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -75,9 +75,9 @@ vector.body: ; preds = %vector.body, %entry
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%vec.phi = phi i32 [ 0, %entry ], [ %4, %vector.body ]
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 501)
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
%2 = select <4 x i1> %active.lane.mask, <4 x i32> %wide.masked.load, <4 x i32> zeroinitializer
%3 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %2)
%4 = add i32 %3, %vec.phi
@@ -89,7 +89,7 @@ for.cond.cleanup: ; preds = %vector.body
ret i32 %4
}
-define dso_local i32 @test_502_504(i32* nocapture readonly %x) {
+define dso_local i32 @test_502_504(ptr nocapture readonly %x) {
; CHECK-LABEL: test_502_504:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -112,9 +112,9 @@ vector.body: ; preds = %vector.body, %entry
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%vec.phi = phi i32 [ 0, %entry ], [ %4, %vector.body ]
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 502)
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
%2 = select <4 x i1> %active.lane.mask, <4 x i32> %wide.masked.load, <4 x i32> zeroinitializer
%3 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %2)
%4 = add i32 %3, %vec.phi
@@ -126,7 +126,7 @@ for.cond.cleanup: ; preds = %vector.body
ret i32 %4
}
-define dso_local i32 @test_503_504(i32* nocapture readonly %x) {
+define dso_local i32 @test_503_504(ptr nocapture readonly %x) {
; CHECK-LABEL: test_503_504:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -149,9 +149,9 @@ vector.body: ; preds = %vector.body, %entry
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%vec.phi = phi i32 [ 0, %entry ], [ %4, %vector.body ]
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 503)
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
%2 = select <4 x i1> %active.lane.mask, <4 x i32> %wide.masked.load, <4 x i32> zeroinitializer
%3 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %2)
%4 = add i32 %3, %vec.phi
@@ -163,7 +163,7 @@ for.cond.cleanup: ; preds = %vector.body
ret i32 %4
}
-define dso_local i32 @test_504_504(i32* nocapture readonly %x) {
+define dso_local i32 @test_504_504(ptr nocapture readonly %x) {
; CHECK-LABEL: test_504_504:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -186,9 +186,9 @@ vector.body: ; preds = %vector.body, %entry
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%vec.phi = phi i32 [ 0, %entry ], [ %4, %vector.body ]
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 504)
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
%2 = select <4 x i1> %active.lane.mask, <4 x i32> %wide.masked.load, <4 x i32> zeroinitializer
%3 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %2)
%4 = add i32 %3, %vec.phi
@@ -201,5 +201,5 @@ for.cond.cleanup: ; preds = %vector.body
}
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
-declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
+declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/count_dominates_start.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/count_dominates_start.mir
index b20b3d141cce3..6307db7f55034 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/count_dominates_start.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/count_dominates_start.mir
@@ -3,7 +3,7 @@
--- |
- define i32 @test(i16* nocapture readonly %x, i16* nocapture readonly %y, i32 %n) {
+ define i32 @test(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) {
entry:
%cmp10 = icmp sgt i32 %n, 0
%0 = add i32 %n, 7
@@ -19,25 +19,25 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv3 = phi i16* [ %scevgep4, %vector.body ], [ %x, %vector.ph ]
- %lsr.iv1 = phi i16* [ %scevgep, %vector.body ], [ %y, %vector.ph ]
+ %lsr.iv3 = phi ptr [ %scevgep4, %vector.body ], [ %x, %vector.ph ]
+ %lsr.iv1 = phi ptr [ %scevgep, %vector.body ], [ %y, %vector.ph ]
%vec.phi = phi i32 [ 0, %vector.ph ], [ %16, %vector.body ]
%7 = phi i32 [ %6, %vector.ph ], [ %17, %vector.body ]
%8 = phi i32 [ %n, %vector.ph ], [ %10, %vector.body ]
- %lsr.iv12 = bitcast i16* %lsr.iv1 to <8 x i16>*
- %lsr.iv35 = bitcast i16* %lsr.iv3 to <8 x i16>*
+ %lsr.iv12 = bitcast ptr %lsr.iv1 to ptr
+ %lsr.iv35 = bitcast ptr %lsr.iv3 to ptr
%9 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %8)
%10 = sub i32 %8, 8
- %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv35, i32 2, <8 x i1> %9, <8 x i16> undef)
+ %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %lsr.iv35, i32 2, <8 x i1> %9, <8 x i16> undef)
%11 = sext <8 x i16> %wide.masked.load to <8 x i32>
- %wide.masked.load13 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv12, i32 2, <8 x i1> %9, <8 x i16> undef)
+ %wide.masked.load13 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %lsr.iv12, i32 2, <8 x i1> %9, <8 x i16> undef)
%12 = sext <8 x i16> %wide.masked.load13 to <8 x i32>
%13 = mul nsw <8 x i32> %12, %11
%14 = select <8 x i1> %9, <8 x i32> %13, <8 x i32> zeroinitializer
%15 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %14)
%16 = add i32 %15, %vec.phi
- %scevgep = getelementptr i16, i16* %lsr.iv1, i32 8
- %scevgep4 = getelementptr i16, i16* %lsr.iv3, i32 8
+ %scevgep = getelementptr i16, ptr %lsr.iv1, i32 8
+ %scevgep4 = getelementptr i16, ptr %lsr.iv3, i32 8
%17 = call i32 @llvm.loop.decrement.reg.i32(i32 %7, i32 1)
%18 = icmp ne i32 %17, 0
br i1 %18, label %vector.body, label %for.cond.cleanup
@@ -48,7 +48,7 @@
}
declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)
- declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>)
+ declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>)
declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/disjoint-vcmp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/disjoint-vcmp.mir
index d71a8299db1a8..d5781155fe0e7 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/disjoint-vcmp.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/disjoint-vcmp.mir
@@ -3,7 +3,7 @@
#
--- |
@mask = external global i16
- define dso_local void @test(i32* noalias nocapture %arg, i32* noalias nocapture readonly %arg1, i32 %arg2, i32* noalias nocapture readonly %arg3) local_unnamed_addr #0 {
+ define dso_local void @test(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1, i32 %arg2, ptr noalias nocapture readonly %arg3) local_unnamed_addr #0 {
bb:
%tmp = icmp eq i32 %arg2, 0
%tmp1 = add i32 %arg2, 3
@@ -12,50 +12,50 @@
%tmp4 = add i32 %tmp3, -4
%tmp5 = lshr i32 %tmp4, 2
%tmp6 = add nuw nsw i32 %tmp5, 1
- %mask.gep9 = bitcast i16* @mask to i16*
- %mask.load = load i16, i16* %mask.gep9
+ %mask.gep9 = bitcast ptr @mask to ptr
+ %mask.load = load i16, ptr %mask.gep9
%conv.mask = zext i16 %mask.load to i32
%invariant.mask = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %conv.mask)
br i1 %tmp, label %bb27, label %bb3
bb3: ; preds = %bb
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp6)
- %scevgep1 = getelementptr i32, i32* %arg3, i32 -4
+ %scevgep1 = getelementptr i32, ptr %arg3, i32 -4
br label %bb9
bb9: ; preds = %bb9, %bb3
- %lsr.iv4 = phi i32* [ %scevgep6, %bb9 ], [ %scevgep1, %bb3 ]
- %lsr.iv2 = phi i32* [ %scevgep3, %bb9 ], [ %arg1, %bb3 ]
- %lsr.iv = phi i32* [ %scevgep, %bb9 ], [ %arg, %bb3 ]
+ %lsr.iv4 = phi ptr [ %scevgep6, %bb9 ], [ %scevgep1, %bb3 ]
+ %lsr.iv2 = phi ptr [ %scevgep3, %bb9 ], [ %arg1, %bb3 ]
+ %lsr.iv = phi ptr [ %scevgep, %bb9 ], [ %arg, %bb3 ]
%tmp7 = phi i32 [ %start, %bb3 ], [ %tmp12, %bb9 ]
%tmp8 = phi i32 [ %arg2, %bb3 ], [ %tmp11, %bb9 ]
- %lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>*
- %lsr.iv24 = bitcast i32* %lsr.iv2 to <4 x i32>*
- %lsr.iv47 = bitcast i32* %lsr.iv4 to <4 x i32>*
+ %lsr.iv1 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv24 = bitcast ptr %lsr.iv2 to ptr
+ %lsr.iv47 = bitcast ptr %lsr.iv4 to ptr
%vctp = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp8)
%and = and <4 x i1> %vctp, %invariant.mask
%tmp11 = sub i32 %tmp8, 4
- %tmp17 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv24, i32 4, <4 x i1> %and, <4 x i32> undef)
- %tmp22 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1, i32 4, <4 x i1> %and, <4 x i32> undef)
+ %tmp17 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv24, i32 4, <4 x i1> %and, <4 x i32> undef)
+ %tmp22 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1, i32 4, <4 x i1> %and, <4 x i32> undef)
%tmp23 = mul nsw <4 x i32> %tmp22, %tmp17
- %scevgep2 = getelementptr <4 x i32>, <4 x i32>* %lsr.iv47, i32 1
- %load.limits = load <4 x i32>, <4 x i32>* %scevgep2
+ %scevgep2 = getelementptr <4 x i32>, ptr %lsr.iv47, i32 1
+ %load.limits = load <4 x i32>, ptr %scevgep2
%0 = insertelement <4 x i32> undef, i32 %conv.mask, i32 0
%1 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
%bad.icmp = icmp ule <4 x i32> %load.limits, %1
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %tmp23, <4 x i32>* %lsr.iv1, i32 4, <4 x i1> %bad.icmp)
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %tmp23, ptr %lsr.iv1, i32 4, <4 x i1> %bad.icmp)
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %tmp7, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
- %scevgep = getelementptr i32, i32* %lsr.iv, i32 4
- %scevgep3 = getelementptr i32, i32* %lsr.iv2, i32 4
- %scevgep6 = getelementptr i32, i32* %lsr.iv4, i32 4
+ %scevgep = getelementptr i32, ptr %lsr.iv, i32 4
+ %scevgep3 = getelementptr i32, ptr %lsr.iv2, i32 4
+ %scevgep6 = getelementptr i32, ptr %lsr.iv4, i32 4
br i1 %tmp13, label %bb9, label %bb27
bb27: ; preds = %bb9, %bb
ret void
}
- declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
+ declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir
index 56bb50a43ab29..d54023503e299 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir
@@ -7,7 +7,7 @@
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-arm-unknown-eabi"
- define dso_local void @use_before_def(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
+ define dso_local void @use_before_def(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
entry:
%cmp8 = icmp sgt i32 %N, 0
%0 = add i32 %N, 3
@@ -23,23 +23,23 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv17 = phi i32* [ %scevgep18, %vector.body ], [ %A, %vector.ph ]
- %lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
- %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ]
+ %lsr.iv17 = phi ptr [ %scevgep18, %vector.body ], [ %A, %vector.ph ]
+ %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %B, %vector.ph ]
%6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
- %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>*
- %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
- %lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>*
+ %lsr.iv13 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr
+ %lsr.iv1719 = bitcast ptr %lsr.iv17 to ptr
%8 = call <4 x i1> @llvm.arm.vctp32(i32 %7)
%9 = sub i32 %7, 4
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef)
- %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef)
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef)
+ %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef)
%10 = add nsw <4 x i32> %wide.masked.load12, %wide.masked.load
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %lsr.iv1719, i32 4, <4 x i1> %8)
- %scevgep = getelementptr i32, i32* %lsr.iv, i32 4
- %scevgep15 = getelementptr i32, i32* %lsr.iv14, i32 4
- %scevgep18 = getelementptr i32, i32* %lsr.iv17, i32 4
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %10, ptr %lsr.iv1719, i32 4, <4 x i1> %8)
+ %scevgep = getelementptr i32, ptr %lsr.iv, i32 4
+ %scevgep15 = getelementptr i32, ptr %lsr.iv14, i32 4
+ %scevgep18 = getelementptr i32, ptr %lsr.iv17, i32 4
%11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1)
%12 = icmp ne i32 %11, 0
br i1 %12, label %vector.body, label %for.cond.cleanup
@@ -50,9 +50,9 @@
declare i32 @llvm.start.loop.iterations.i32(i32)
declare <4 x i1> @llvm.arm.vctp32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
- declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
- declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
+ declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+ declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
...
---
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir
index f7db9a60a0e20..4998b5bafc133 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir
@@ -7,17 +7,17 @@
# CHECK: LETP
--- |
- %struct.DCT_InstanceTypeDef = type { float*, i32, i32 }
+ %struct.DCT_InstanceTypeDef = type { ptr, i32, i32 }
; Function Attrs: nofree nounwind
- define hidden arm_aapcs_vfpcc void @test(%struct.DCT_InstanceTypeDef* nocapture readonly %S, float* %pIn, float* nocapture %pOut) {
+ define hidden arm_aapcs_vfpcc void @test(ptr nocapture readonly %S, ptr %pIn, ptr nocapture %pOut) {
entry:
- %NumInputs = getelementptr inbounds %struct.DCT_InstanceTypeDef, %struct.DCT_InstanceTypeDef* %S, i32 0, i32 2
- %0 = load i32, i32* %NumInputs, align 4
- %NumFilters = getelementptr inbounds %struct.DCT_InstanceTypeDef, %struct.DCT_InstanceTypeDef* %S, i32 0, i32 1
- %1 = load i32, i32* %NumFilters, align 4
- %pDCTCoefs34 = bitcast %struct.DCT_InstanceTypeDef* %S to float**
- %2 = load float*, float** %pDCTCoefs34, align 4
+ %NumInputs = getelementptr inbounds %struct.DCT_InstanceTypeDef, ptr %S, i32 0, i32 2
+ %0 = load i32, ptr %NumInputs, align 4
+ %NumFilters = getelementptr inbounds %struct.DCT_InstanceTypeDef, ptr %S, i32 0, i32 1
+ %1 = load i32, ptr %NumFilters, align 4
+ %pDCTCoefs34 = bitcast ptr %S to ptr
+ %2 = load ptr, ptr %pDCTCoefs34, align 4
%3 = add i32 %0, 3
%4 = icmp slt i32 %0, 4
%smin36 = select i1 %4, i32 %0, i32 4
@@ -29,14 +29,14 @@
do.body: ; preds = %do.body, %entry
%count.0 = phi i32 [ %0, %entry ], [ %12, %do.body ]
- %pInT.0 = phi float* [ %pIn, %entry ], [ %add.ptr, %do.body ]
+ %pInT.0 = phi ptr [ %pIn, %entry ], [ %add.ptr, %do.body ]
%sumVec.0 = phi <4 x float> [ zeroinitializer, %entry ], [ %11, %do.body ]
%8 = phi i32 [ %start1, %entry ], [ %13, %do.body ]
- %pInT.033 = bitcast float* %pInT.0 to <4 x float>*
+ %pInT.033 = bitcast ptr %pInT.0 to ptr
%9 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %count.0)
- %10 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pInT.033, i32 4, <4 x i1> %9, <4 x float> zeroinitializer)
+ %10 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %pInT.033, i32 4, <4 x i1> %9, <4 x float> zeroinitializer)
%11 = tail call fast <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %sumVec.0, <4 x float> %10, <4 x i1> %9, <4 x float> undef)
- %add.ptr = getelementptr inbounds float, float* %pInT.0, i32 4
+ %add.ptr = getelementptr inbounds float, ptr %pInT.0, i32 4
%12 = add i32 %count.0, -4
%13 = call i32 @llvm.loop.decrement.reg.i32(i32 %8, i32 1)
%14 = icmp ne i32 %13, 0
@@ -50,27 +50,27 @@
%add1 = fadd fast float %add, %17
%18 = extractelement <4 x float> %11, i32 3
%add2 = fadd fast float %add1, %18
- %19 = load float, float* %2, align 4
+ %19 = load float, ptr %2, align 4
%mul = fmul fast float %19, %add2
- store float %mul, float* %pOut, align 4
+ store float %mul, ptr %pOut, align 4
%sub4 = add i32 %1, -4
%cmp5201 = icmp ugt i32 %sub4, 1
br i1 %cmp5201, label %for.body.lr.ph, label %for.cond54.preheader
for.body.lr.ph: ; preds = %do.end
- %scevgep = getelementptr float, float* %pIn, i32 4
+ %scevgep = getelementptr float, ptr %pIn, i32 4
%20 = add i32 %0, 4
- %scevgep5 = getelementptr float, float* %2, i32 %20
+ %scevgep5 = getelementptr float, ptr %2, i32 %20
%21 = shl i32 %0, 4
%22 = shl i32 %0, 1
%23 = add i32 %22, 4
- %scevgep12 = getelementptr float, float* %2, i32 %23
+ %scevgep12 = getelementptr float, ptr %2, i32 %23
%24 = mul i32 %0, 3
%25 = add i32 %24, 4
- %scevgep19 = getelementptr float, float* %2, i32 %25
+ %scevgep19 = getelementptr float, ptr %2, i32 %25
%26 = shl i32 %0, 2
%27 = add i32 %26, 4
- %scevgep26 = getelementptr float, float* %2, i32 %27
+ %scevgep26 = getelementptr float, ptr %2, i32 %27
%28 = add i32 %0, -1
%29 = add i32 %0, -4
%30 = icmp slt i32 %29, 4
@@ -95,78 +95,78 @@
br label %for.body56
for.body: ; preds = %do.end33, %for.body.lr.ph
- %lsr.iv27 = phi float* [ %88, %do.end33 ], [ %scevgep26, %for.body.lr.ph ]
- %lsr.iv20 = phi float* [ %87, %do.end33 ], [ %scevgep19, %for.body.lr.ph ]
- %lsr.iv13 = phi float* [ %86, %do.end33 ], [ %scevgep12, %for.body.lr.ph ]
- %lsr.iv6 = phi float* [ %85, %do.end33 ], [ %scevgep5, %for.body.lr.ph ]
+ %lsr.iv27 = phi ptr [ %88, %do.end33 ], [ %scevgep26, %for.body.lr.ph ]
+ %lsr.iv20 = phi ptr [ %87, %do.end33 ], [ %scevgep19, %for.body.lr.ph ]
+ %lsr.iv13 = phi ptr [ %86, %do.end33 ], [ %scevgep12, %for.body.lr.ph ]
+ %lsr.iv6 = phi ptr [ %85, %do.end33 ], [ %scevgep5, %for.body.lr.ph ]
%k.0202 = phi i32 [ 1, %for.body.lr.ph ], [ %add53, %do.end33 ]
- %39 = bitcast float* %pIn to <4 x float>*
+ %39 = bitcast ptr %pIn to ptr
%mul7 = mul i32 %k.0202, %0
- %arrayidx8 = getelementptr inbounds float, float* %2, i32 %mul7
+ %arrayidx8 = getelementptr inbounds float, ptr %2, i32 %mul7
%add9 = add nuw nsw i32 %k.0202, 1
%mul10 = mul i32 %add9, %0
- %arrayidx11 = getelementptr inbounds float, float* %2, i32 %mul10
+ %arrayidx11 = getelementptr inbounds float, ptr %2, i32 %mul10
%add12 = add nuw nsw i32 %k.0202, 2
%mul13 = mul i32 %add12, %0
- %arrayidx14 = getelementptr inbounds float, float* %2, i32 %mul13
+ %arrayidx14 = getelementptr inbounds float, ptr %2, i32 %mul13
%add15 = add i32 %k.0202, 3
%mul16 = mul i32 %add15, %0
- %arrayidx17 = getelementptr inbounds float, float* %2, i32 %mul16
- %40 = load <4 x float>, <4 x float>* %39, align 4
- %41 = bitcast float* %arrayidx8 to <4 x float>*
- %42 = load <4 x float>, <4 x float>* %41, align 4
+ %arrayidx17 = getelementptr inbounds float, ptr %2, i32 %mul16
+ %40 = load <4 x float>, ptr %39, align 4
+ %41 = bitcast ptr %arrayidx8 to ptr
+ %42 = load <4 x float>, ptr %41, align 4
%43 = fmul fast <4 x float> %42, %40
- %44 = bitcast float* %arrayidx11 to <4 x float>*
- %45 = load <4 x float>, <4 x float>* %44, align 4
+ %44 = bitcast ptr %arrayidx11 to ptr
+ %45 = load <4 x float>, ptr %44, align 4
%46 = fmul fast <4 x float> %45, %40
- %47 = bitcast float* %arrayidx14 to <4 x float>*
- %48 = load <4 x float>, <4 x float>* %47, align 4
+ %47 = bitcast ptr %arrayidx14 to ptr
+ %48 = load <4 x float>, ptr %47, align 4
%49 = fmul fast <4 x float> %48, %40
- %50 = bitcast float* %arrayidx17 to <4 x float>*
- %51 = load <4 x float>, <4 x float>* %50, align 4
+ %50 = bitcast ptr %arrayidx17 to ptr
+ %51 = load <4 x float>, ptr %50, align 4
%52 = fmul fast <4 x float> %51, %40
%start2 = call i32 @llvm.start.loop.iterations.i32(i32 %33)
br label %do.body24
do.body24: ; preds = %do.body24, %for.body
- %lsr.iv30 = phi float* [ %scevgep31, %do.body24 ], [ %lsr.iv27, %for.body ]
- %lsr.iv23 = phi float* [ %scevgep24, %do.body24 ], [ %lsr.iv20, %for.body ]
- %lsr.iv16 = phi float* [ %scevgep17, %do.body24 ], [ %lsr.iv13, %for.body ]
- %lsr.iv9 = phi float* [ %scevgep10, %do.body24 ], [ %lsr.iv6, %for.body ]
- %lsr.iv = phi float* [ %scevgep3, %do.body24 ], [ %scevgep, %for.body ]
+ %lsr.iv30 = phi ptr [ %scevgep31, %do.body24 ], [ %lsr.iv27, %for.body ]
+ %lsr.iv23 = phi ptr [ %scevgep24, %do.body24 ], [ %lsr.iv20, %for.body ]
+ %lsr.iv16 = phi ptr [ %scevgep17, %do.body24 ], [ %lsr.iv13, %for.body ]
+ %lsr.iv9 = phi ptr [ %scevgep10, %do.body24 ], [ %lsr.iv6, %for.body ]
+ %lsr.iv = phi ptr [ %scevgep3, %do.body24 ], [ %scevgep, %for.body ]
%sumVec0.0 = phi <4 x float> [ %43, %for.body ], [ %56, %do.body24 ]
%sumVec1.0 = phi <4 x float> [ %46, %for.body ], [ %58, %do.body24 ]
%sumVec2.0 = phi <4 x float> [ %49, %for.body ], [ %60, %do.body24 ]
%sumVec3.0 = phi <4 x float> [ %52, %for.body ], [ %62, %do.body24 ]
%53 = phi i32 [ %start2, %for.body ], [ %63, %do.body24 ]
- %lsr.iv4 = bitcast float* %lsr.iv to <4 x float>*
- %lsr.iv911 = bitcast float* %lsr.iv9 to <4 x float>*
- %lsr.iv1618 = bitcast float* %lsr.iv16 to <4 x float>*
- %lsr.iv2325 = bitcast float* %lsr.iv23 to <4 x float>*
- %lsr.iv3032 = bitcast float* %lsr.iv30 to <4 x float>*
- %54 = load <4 x float>, <4 x float>* %lsr.iv4, align 4
- %55 = load <4 x float>, <4 x float>* %lsr.iv911, align 4
+ %lsr.iv4 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv911 = bitcast ptr %lsr.iv9 to ptr
+ %lsr.iv1618 = bitcast ptr %lsr.iv16 to ptr
+ %lsr.iv2325 = bitcast ptr %lsr.iv23 to ptr
+ %lsr.iv3032 = bitcast ptr %lsr.iv30 to ptr
+ %54 = load <4 x float>, ptr %lsr.iv4, align 4
+ %55 = load <4 x float>, ptr %lsr.iv911, align 4
%56 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %54, <4 x float> %55, <4 x float> %sumVec0.0)
- %57 = load <4 x float>, <4 x float>* %lsr.iv1618, align 4
+ %57 = load <4 x float>, ptr %lsr.iv1618, align 4
%58 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %54, <4 x float> %57, <4 x float> %sumVec1.0)
- %59 = load <4 x float>, <4 x float>* %lsr.iv2325, align 4
+ %59 = load <4 x float>, ptr %lsr.iv2325, align 4
%60 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %54, <4 x float> %59, <4 x float> %sumVec2.0)
- %61 = load <4 x float>, <4 x float>* %lsr.iv3032, align 4
+ %61 = load <4 x float>, ptr %lsr.iv3032, align 4
%62 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %54, <4 x float> %61, <4 x float> %sumVec3.0)
- %scevgep3 = getelementptr float, float* %lsr.iv, i32 4
- %scevgep10 = getelementptr float, float* %lsr.iv9, i32 4
- %scevgep17 = getelementptr float, float* %lsr.iv16, i32 4
- %scevgep24 = getelementptr float, float* %lsr.iv23, i32 4
- %scevgep31 = getelementptr float, float* %lsr.iv30, i32 4
+ %scevgep3 = getelementptr float, ptr %lsr.iv, i32 4
+ %scevgep10 = getelementptr float, ptr %lsr.iv9, i32 4
+ %scevgep17 = getelementptr float, ptr %lsr.iv16, i32 4
+ %scevgep24 = getelementptr float, ptr %lsr.iv23, i32 4
+ %scevgep31 = getelementptr float, ptr %lsr.iv30, i32 4
%63 = call i32 @llvm.loop.decrement.reg.i32(i32 %53, i32 1)
%64 = icmp ne i32 %63, 0
br i1 %64, label %do.body24, label %do.end33
do.end33: ; preds = %do.body24
- %65 = bitcast float* %lsr.iv27 to i1*
- %66 = bitcast float* %lsr.iv20 to i1*
- %67 = bitcast float* %lsr.iv13 to i1*
- %68 = bitcast float* %lsr.iv6 to i1*
+ %65 = bitcast ptr %lsr.iv27 to ptr
+ %66 = bitcast ptr %lsr.iv20 to ptr
+ %67 = bitcast ptr %lsr.iv13 to ptr
+ %68 = bitcast ptr %lsr.iv6 to ptr
%69 = extractelement <4 x float> %56, i32 0
%70 = extractelement <4 x float> %56, i32 1
%add34 = fadd fast float %69, %70
@@ -174,8 +174,8 @@
%add35 = fadd fast float %add34, %71
%72 = extractelement <4 x float> %56, i32 3
%add36 = fadd fast float %add35, %72
- %arrayidx37 = getelementptr inbounds float, float* %pOut, i32 %k.0202
- store float %add36, float* %arrayidx37, align 4
+ %arrayidx37 = getelementptr inbounds float, ptr %pOut, i32 %k.0202
+ store float %add36, ptr %arrayidx37, align 4
%73 = extractelement <4 x float> %58, i32 0
%74 = extractelement <4 x float> %58, i32 1
%add38 = fadd fast float %73, %74
@@ -183,8 +183,8 @@
%add39 = fadd fast float %add38, %75
%76 = extractelement <4 x float> %58, i32 3
%add40 = fadd fast float %add39, %76
- %arrayidx42 = getelementptr inbounds float, float* %pOut, i32 %add9
- store float %add40, float* %arrayidx42, align 4
+ %arrayidx42 = getelementptr inbounds float, ptr %pOut, i32 %add9
+ store float %add40, ptr %arrayidx42, align 4
%77 = extractelement <4 x float> %60, i32 0
%78 = extractelement <4 x float> %60, i32 1
%add43 = fadd fast float %77, %78
@@ -192,8 +192,8 @@
%add44 = fadd fast float %add43, %79
%80 = extractelement <4 x float> %60, i32 3
%add45 = fadd fast float %add44, %80
- %arrayidx47 = getelementptr inbounds float, float* %pOut, i32 %add12
- store float %add45, float* %arrayidx47, align 4
+ %arrayidx47 = getelementptr inbounds float, ptr %pOut, i32 %add12
+ store float %add45, ptr %arrayidx47, align 4
%81 = extractelement <4 x float> %62, i32 0
%82 = extractelement <4 x float> %62, i32 1
%add48 = fadd fast float %81, %82
@@ -201,41 +201,41 @@
%add49 = fadd fast float %add48, %83
%84 = extractelement <4 x float> %62, i32 3
%add50 = fadd fast float %add49, %84
- %arrayidx52 = getelementptr inbounds float, float* %pOut, i32 %add15
- store float %add50, float* %arrayidx52, align 4
+ %arrayidx52 = getelementptr inbounds float, ptr %pOut, i32 %add15
+ store float %add50, ptr %arrayidx52, align 4
%add53 = add i32 %k.0202, 4
- %scevgep8 = getelementptr i1, i1* %68, i32 %21
- %85 = bitcast i1* %scevgep8 to float*
- %scevgep15 = getelementptr i1, i1* %67, i32 %21
- %86 = bitcast i1* %scevgep15 to float*
- %scevgep22 = getelementptr i1, i1* %66, i32 %21
- %87 = bitcast i1* %scevgep22 to float*
- %scevgep29 = getelementptr i1, i1* %65, i32 %21
- %88 = bitcast i1* %scevgep29 to float*
+ %scevgep8 = getelementptr i1, ptr %68, i32 %21
+ %85 = bitcast ptr %scevgep8 to ptr
+ %scevgep15 = getelementptr i1, ptr %67, i32 %21
+ %86 = bitcast ptr %scevgep15 to ptr
+ %scevgep22 = getelementptr i1, ptr %66, i32 %21
+ %87 = bitcast ptr %scevgep22 to ptr
+ %scevgep29 = getelementptr i1, ptr %65, i32 %21
+ %88 = bitcast ptr %scevgep29 to ptr
%cmp5 = icmp ult i32 %add53, %sub4
br i1 %cmp5, label %for.body, label %for.cond54.preheader
for.body56: ; preds = %for.body56.preheader, %do.end66
%k.1200 = phi i32 [ %inc, %do.end66 ], [ %k.0.lcssa, %for.body56.preheader ]
%mul57 = mul i32 %k.1200, %0
- %arrayidx58 = getelementptr inbounds float, float* %2, i32 %mul57
+ %arrayidx58 = getelementptr inbounds float, ptr %2, i32 %mul57
%start3 = call i32 @llvm.start.loop.iterations.i32(i32 %38)
br label %do.body59
do.body59: ; preds = %do.body59, %for.body56
%count.2 = phi i32 [ %0, %for.body56 ], [ %94, %do.body59 ]
- %pInT.2 = phi float* [ %pIn, %for.body56 ], [ %add.ptr61, %do.body59 ]
- %pCos0.1 = phi float* [ %arrayidx58, %for.body56 ], [ %add.ptr62, %do.body59 ]
+ %pInT.2 = phi ptr [ %pIn, %for.body56 ], [ %add.ptr61, %do.body59 ]
+ %pCos0.1 = phi ptr [ %arrayidx58, %for.body56 ], [ %add.ptr62, %do.body59 ]
%sumVec.1 = phi <4 x float> [ zeroinitializer, %for.body56 ], [ %93, %do.body59 ]
%89 = phi i32 [ %start3, %for.body56 ], [ %95, %do.body59 ]
- %pInT.21 = bitcast float* %pInT.2 to <4 x float>*
- %pCos0.12 = bitcast float* %pCos0.1 to <4 x float>*
+ %pInT.21 = bitcast ptr %pInT.2 to ptr
+ %pCos0.12 = bitcast ptr %pCos0.1 to ptr
%90 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %count.2)
- %91 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pInT.21, i32 4, <4 x i1> %90, <4 x float> zeroinitializer)
- %92 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pCos0.12, i32 4, <4 x i1> %90, <4 x float> zeroinitializer)
+ %91 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %pInT.21, i32 4, <4 x i1> %90, <4 x float> zeroinitializer)
+ %92 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %pCos0.12, i32 4, <4 x i1> %90, <4 x float> zeroinitializer)
%93 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %91, <4 x float> %92, <4 x float> %sumVec.1, <4 x i1> %90)
- %add.ptr61 = getelementptr inbounds float, float* %pInT.2, i32 4
- %add.ptr62 = getelementptr inbounds float, float* %pCos0.1, i32 4
+ %add.ptr61 = getelementptr inbounds float, ptr %pInT.2, i32 4
+ %add.ptr62 = getelementptr inbounds float, ptr %pCos0.1, i32 4
%94 = add i32 %count.2, -4
%95 = call i32 @llvm.loop.decrement.reg.i32(i32 %89, i32 1)
%96 = icmp ne i32 %95, 0
@@ -249,8 +249,8 @@
%add68 = fadd fast float %add67, %99
%100 = extractelement <4 x float> %93, i32 3
%add69 = fadd fast float %add68, %100
- %arrayidx70 = getelementptr inbounds float, float* %pOut, i32 %k.1200
- store float %add69, float* %arrayidx70, align 4
+ %arrayidx70 = getelementptr inbounds float, ptr %pOut, i32 %k.1200
+ store float %add69, ptr %arrayidx70, align 4
%inc = add nuw i32 %k.1200, 1
%exitcond.not = icmp eq i32 %inc, %1
br i1 %exitcond.not, label %for.end72, label %for.body56
@@ -260,7 +260,7 @@
}
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1
- declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #2
+ declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) #2
declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #1
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #3
declare <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x float>, <4 x i1>) #1
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extract-element.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extract-element.mir
index 6955007957f4e..fe156fe9dab71 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extract-element.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extract-element.mir
@@ -5,7 +5,7 @@
# predication.
--- |
- define dso_local i32 @no_vpsel_liveout(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
+ define dso_local i32 @no_vpsel_liveout(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -22,22 +22,22 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp13, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
- %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr
%tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
- %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32>
%tmp12 = mul nsw <4 x i32> %tmp11, %tmp10
%tmp13 = add <4 x i32> %tmp12, %vec.phi
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
- %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
+ %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4
%tmp14 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp15 = icmp ne i32 %tmp14, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -51,7 +51,7 @@
%res.0.lcssa = phi i32 [ 0, %entry ], [ %tmp16, %middle.block ]
ret i32 %res.0.lcssa
}
- declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
+ declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-16.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-16.mir
index 4d3f0ac5f92d2..e529a691545f7 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-16.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-16.mir
@@ -1,7 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s
--- |
- define dso_local void @incorrect_sub_16(i16* noalias nocapture %A, i16* noalias nocapture readonly %B, i16* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
+ define dso_local void @incorrect_sub_16(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
entry:
%cmp8 = icmp sgt i32 %N, 0
%0 = add i32 %N, 3
@@ -17,23 +17,23 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv17 = phi i16* [ %scevgep18, %vector.body ], [ %A, %vector.ph ]
- %lsr.iv14 = phi i16* [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %B, %vector.ph ]
+ %lsr.iv17 = phi ptr [ %scevgep18, %vector.body ], [ %A, %vector.ph ]
+ %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %B, %vector.ph ]
%6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
- %lsr.iv13 = bitcast i16* %lsr.iv to <8 x i16>*
- %lsr.iv1416 = bitcast i16* %lsr.iv14 to <8 x i16>*
- %lsr.iv1719 = bitcast i16* %lsr.iv17 to <8 x i16>*
+ %lsr.iv13 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr
+ %lsr.iv1719 = bitcast ptr %lsr.iv17 to ptr
%8 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %7)
%9 = sub i32 %7, 7
- %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv13, i32 4, <8 x i1> %8, <8 x i16> undef)
- %wide.masked.load12 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv1416, i32 4, <8 x i1> %8, <8 x i16> undef)
+ %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %lsr.iv13, i32 4, <8 x i1> %8, <8 x i16> undef)
+ %wide.masked.load12 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %lsr.iv1416, i32 4, <8 x i1> %8, <8 x i16> undef)
%10 = add nsw <8 x i16> %wide.masked.load12, %wide.masked.load
- call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %10, <8 x i16>* %lsr.iv1719, i32 4, <8 x i1> %8)
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 8
- %scevgep15 = getelementptr i16, i16* %lsr.iv14, i32 8
- %scevgep18 = getelementptr i16, i16* %lsr.iv17, i32 8
+ call void @llvm.masked.store.v8i16.p0(<8 x i16> %10, ptr %lsr.iv1719, i32 4, <8 x i1> %8)
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 8
+ %scevgep15 = getelementptr i16, ptr %lsr.iv14, i32 8
+ %scevgep18 = getelementptr i16, ptr %lsr.iv17, i32 8
%11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1)
%12 = icmp ne i32 %11, 0
br i1 %12, label %vector.body, label %for.cond.cleanup
@@ -44,8 +44,8 @@
declare i32 @llvm.start.loop.iterations.i32(i32)
declare <8 x i1> @llvm.arm.mve.vctp16(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
- declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>)
- declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>)
+ declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>)
+ declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>)
...
---
name: incorrect_sub_16
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-32.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-32.mir
index 7ea07bd639560..51844a7cf6929 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-32.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-32.mir
@@ -8,7 +8,7 @@
# We should optimise away the SUB
--- |
- define dso_local void @incorrect_sub_32(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
+ define dso_local void @incorrect_sub_32(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
entry:
%cmp8 = icmp sgt i32 %N, 0
%0 = add i32 %N, 3
@@ -24,23 +24,23 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv17 = phi i32* [ %scevgep18, %vector.body ], [ %A, %vector.ph ]
- %lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
- %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ]
+ %lsr.iv17 = phi ptr [ %scevgep18, %vector.body ], [ %A, %vector.ph ]
+ %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %B, %vector.ph ]
%6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
- %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>*
- %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
- %lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>*
+ %lsr.iv13 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr
+ %lsr.iv1719 = bitcast ptr %lsr.iv17 to ptr
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
%9 = sub i32 %7, 5
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef)
- %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef)
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef)
+ %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef)
%10 = add nsw <4 x i32> %wide.masked.load12, %wide.masked.load
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %lsr.iv1719, i32 4, <4 x i1> %8)
- %scevgep = getelementptr i32, i32* %lsr.iv, i32 4
- %scevgep15 = getelementptr i32, i32* %lsr.iv14, i32 4
- %scevgep18 = getelementptr i32, i32* %lsr.iv17, i32 4
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %10, ptr %lsr.iv1719, i32 4, <4 x i1> %8)
+ %scevgep = getelementptr i32, ptr %lsr.iv, i32 4
+ %scevgep15 = getelementptr i32, ptr %lsr.iv14, i32 4
+ %scevgep18 = getelementptr i32, ptr %lsr.iv17, i32 4
%11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1)
%12 = icmp ne i32 %11, 0
br i1 %12, label %vector.body, label %for.cond.cleanup
@@ -51,8 +51,8 @@
declare i32 @llvm.start.loop.iterations.i32(i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
- declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
+ declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
...
---
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-8.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-8.mir
index eb578318c3ac8..305c31b33b21c 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-8.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-8.mir
@@ -1,7 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s
--- |
- define dso_local void @incorrect_sub_8(i8* noalias nocapture %A, i8* noalias nocapture readonly %B, i8* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
+ define dso_local void @incorrect_sub_8(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
entry:
%cmp8 = icmp sgt i32 %N, 0
%0 = add i32 %N, 3
@@ -17,23 +17,23 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv17 = phi i8* [ %scevgep18, %vector.body ], [ %A, %vector.ph ]
- %lsr.iv14 = phi i8* [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
- %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %B, %vector.ph ]
+ %lsr.iv17 = phi ptr [ %scevgep18, %vector.body ], [ %A, %vector.ph ]
+ %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %B, %vector.ph ]
%6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
- %lsr.iv13 = bitcast i8* %lsr.iv to <16 x i8>*
- %lsr.iv1416 = bitcast i8* %lsr.iv14 to <16 x i8>*
- %lsr.iv1719 = bitcast i8* %lsr.iv17 to <16 x i8>*
+ %lsr.iv13 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr
+ %lsr.iv1719 = bitcast ptr %lsr.iv17 to ptr
%8 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %7)
%9 = sub i32 %7, 15
- %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv13, i32 4, <16 x i1> %8, <16 x i8> undef)
- %wide.masked.load12 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv1416, i32 4, <16 x i1> %8, <16 x i8> undef)
+ %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv13, i32 4, <16 x i1> %8, <16 x i8> undef)
+ %wide.masked.load12 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv1416, i32 4, <16 x i1> %8, <16 x i8> undef)
%10 = add nsw <16 x i8> %wide.masked.load12, %wide.masked.load
- call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %10, <16 x i8>* %lsr.iv1719, i32 4, <16 x i1> %8)
- %scevgep = getelementptr i8, i8* %lsr.iv, i32 16
- %scevgep15 = getelementptr i8, i8* %lsr.iv14, i32 16
- %scevgep18 = getelementptr i8, i8* %lsr.iv17, i32 16
+ call void @llvm.masked.store.v16i8.p0(<16 x i8> %10, ptr %lsr.iv1719, i32 4, <16 x i1> %8)
+ %scevgep = getelementptr i8, ptr %lsr.iv, i32 16
+ %scevgep15 = getelementptr i8, ptr %lsr.iv14, i32 16
+ %scevgep18 = getelementptr i8, ptr %lsr.iv17, i32 16
%11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1)
%12 = icmp ne i32 %11, 0
br i1 %12, label %vector.body, label %for.cond.cleanup
@@ -44,9 +44,9 @@
declare i32 @llvm.start.loop.iterations.i32(i32)
declare <16 x i1> @llvm.arm.mve.vctp8(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
- declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>)
- declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>)
- declare void @llvm.stackprotector(i8*, i8**)
+ declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>)
+ declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32 immarg, <16 x i1>)
+ declare void @llvm.stackprotector(ptr, ptr)
...
---
name: incorrect_sub_8
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-1.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-1.mir
index d9b8ca242ee63..6ef6ba1046dda 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-1.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-1.mir
@@ -4,7 +4,7 @@
# Test that VPNOTs cannot be within a tail predicated loop.
--- |
- define dso_local void @inloop_vpnot(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %c, i16* nocapture readonly %d, i32* nocapture %e, i32 %N) local_unnamed_addr #0 {
+ define dso_local void @inloop_vpnot(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture readonly %c, ptr nocapture readonly %d, ptr nocapture %e, i32 %N) local_unnamed_addr #0 {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -21,39 +21,39 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv.e = phi i32* [ %scevgep.e, %vector.body ], [ %e, %vector.ph ]
- %lsr.iv.d = phi i16* [ %scevgep.d, %vector.body ], [ %d, %vector.ph ]
- %lsr.iv.c = phi i16* [ %scevgep.c, %vector.body ], [ %c, %vector.ph ]
- %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv.e = phi ptr [ %scevgep.e, %vector.body ], [ %e, %vector.ph ]
+ %lsr.iv.d = phi ptr [ %scevgep.d, %vector.body ], [ %d, %vector.ph ]
+ %lsr.iv.c = phi ptr [ %scevgep.c, %vector.body ], [ %c, %vector.ph ]
+ %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp14, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
- %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>*
- %lsr.iv1820.c = bitcast i16* %lsr.iv.c to <4 x i16>*
- %lsr.iv17.d = bitcast i16* %lsr.iv.d to <4 x i16>*
- %lsr.cast.e = bitcast i32* %lsr.iv.e to <4 x i32>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr
+ %lsr.iv1820.c = bitcast ptr %lsr.iv.c to ptr
+ %lsr.iv17.d = bitcast ptr %lsr.iv.d to ptr
+ %lsr.cast.e = bitcast ptr %lsr.iv.e to ptr
%tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
- %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32>
- %wide.masked.load.c = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820.c, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load.c = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820.c, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%sext.load.c = sext <4 x i16> %wide.masked.load.c to <4 x i32>
- %wide.masked.load.d = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17.d, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load.d = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17.d, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%sext.load.d = sext <4 x i16> %wide.masked.load.d to <4 x i32>
%tmp12 = mul nsw <4 x i32> %tmp11, %tmp10
%mul.2 = mul nsw <4 x i32> %sext.load.c, %sext.load.d
%tmp13 = add <4 x i32> %tmp12, %mul.2
%tmp14 = add <4 x i32> %tmp13, %vec.phi
%vpnot = xor <4 x i1> %tmp8, <i1 true, i1 true, i1 true, i1 true>
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %tmp14, <4 x i32>* %lsr.cast.e, i32 4, <4 x i1> %vpnot)
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
- %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4
- %scevgep.c = getelementptr i16, i16* %lsr.iv.c, i32 4
- %scevgep.d = getelementptr i16, i16* %lsr.iv.d, i32 4
- %scevgep.e = getelementptr i32, i32* %lsr.iv.e, i32 4
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %tmp14, ptr %lsr.cast.e, i32 4, <4 x i1> %vpnot)
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
+ %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4
+ %scevgep.c = getelementptr i16, ptr %lsr.iv.c, i32 4
+ %scevgep.d = getelementptr i16, ptr %lsr.iv.d, i32 4
+ %scevgep.e = getelementptr i32, ptr %lsr.iv.e, i32 4
%tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp16 = icmp ne i32 %tmp15, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -62,8 +62,8 @@
for.cond.cleanup: ; preds = %vector.body, %entry
ret void
}
- declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2
+ declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) #1
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #2
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-2.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-2.mir
index 92d59989d955d..6681e0a820719 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-2.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-2.mir
@@ -4,7 +4,7 @@
# Test that a predicated VPNOT cannot be in a tail predicated loop.
--- |
- define dso_local void @inloop_vpnot(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %c, i16* nocapture readonly %d, i32* nocapture %e, i32 %N) local_unnamed_addr #0 {
+ define dso_local void @inloop_vpnot(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture readonly %c, ptr nocapture readonly %d, ptr nocapture %e, i32 %N) local_unnamed_addr #0 {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -21,39 +21,39 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv.e = phi i32* [ %scevgep.e, %vector.body ], [ %e, %vector.ph ]
- %lsr.iv.d = phi i16* [ %scevgep.d, %vector.body ], [ %d, %vector.ph ]
- %lsr.iv.c = phi i16* [ %scevgep.c, %vector.body ], [ %c, %vector.ph ]
- %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv.e = phi ptr [ %scevgep.e, %vector.body ], [ %e, %vector.ph ]
+ %lsr.iv.d = phi ptr [ %scevgep.d, %vector.body ], [ %d, %vector.ph ]
+ %lsr.iv.c = phi ptr [ %scevgep.c, %vector.body ], [ %c, %vector.ph ]
+ %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp14, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
- %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>*
- %lsr.iv1820.c = bitcast i16* %lsr.iv.c to <4 x i16>*
- %lsr.iv17.d = bitcast i16* %lsr.iv.d to <4 x i16>*
- %lsr.cast.e = bitcast i32* %lsr.iv.e to <4 x i32>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr
+ %lsr.iv1820.c = bitcast ptr %lsr.iv.c to ptr
+ %lsr.iv17.d = bitcast ptr %lsr.iv.d to ptr
+ %lsr.cast.e = bitcast ptr %lsr.iv.e to ptr
%tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
- %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32>
- %wide.masked.load.c = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820.c, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load.c = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820.c, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%sext.load.c = sext <4 x i16> %wide.masked.load.c to <4 x i32>
- %wide.masked.load.d = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17.d, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load.d = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17.d, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%sext.load.d = sext <4 x i16> %wide.masked.load.d to <4 x i32>
%tmp12 = mul nsw <4 x i32> %tmp11, %tmp10
%mul.2 = mul nsw <4 x i32> %sext.load.c, %sext.load.d
%tmp13 = add <4 x i32> %tmp12, %mul.2
%tmp14 = add <4 x i32> %tmp13, %vec.phi
%vpnot = xor <4 x i1> %tmp8, <i1 true, i1 true, i1 true, i1 true>
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %tmp14, <4 x i32>* %lsr.cast.e, i32 4, <4 x i1> %vpnot)
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
- %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4
- %scevgep.c = getelementptr i16, i16* %lsr.iv.c, i32 4
- %scevgep.d = getelementptr i16, i16* %lsr.iv.d, i32 4
- %scevgep.e = getelementptr i32, i32* %lsr.iv.e, i32 4
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %tmp14, ptr %lsr.cast.e, i32 4, <4 x i1> %vpnot)
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
+ %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4
+ %scevgep.c = getelementptr i16, ptr %lsr.iv.c, i32 4
+ %scevgep.d = getelementptr i16, ptr %lsr.iv.d, i32 4
+ %scevgep.e = getelementptr i32, ptr %lsr.iv.e, i32 4
%tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp16 = icmp ne i32 %tmp15, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -62,8 +62,8 @@
for.cond.cleanup: ; preds = %vector.body, %entry
ret void
}
- declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2
+ declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) #1
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #2
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-3.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-3.mir
index 2a8aa845a06b7..d0716d8724957 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-3.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-3.mir
@@ -4,7 +4,7 @@
# Test that a VPNOT is not added to a max sized VPT block.
--- |
- define dso_local void @inloop_vpnot(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %c, i16* nocapture readonly %d, i32* nocapture %e, i32 %N) local_unnamed_addr #0 {
+ define dso_local void @inloop_vpnot(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture readonly %c, ptr nocapture readonly %d, ptr nocapture %e, i32 %N) local_unnamed_addr #0 {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -21,39 +21,39 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv.e = phi i32* [ %scevgep.e, %vector.body ], [ %e, %vector.ph ]
- %lsr.iv.d = phi i16* [ %scevgep.d, %vector.body ], [ %d, %vector.ph ]
- %lsr.iv.c = phi i16* [ %scevgep.c, %vector.body ], [ %c, %vector.ph ]
- %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv.e = phi ptr [ %scevgep.e, %vector.body ], [ %e, %vector.ph ]
+ %lsr.iv.d = phi ptr [ %scevgep.d, %vector.body ], [ %d, %vector.ph ]
+ %lsr.iv.c = phi ptr [ %scevgep.c, %vector.body ], [ %c, %vector.ph ]
+ %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp14, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
- %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>*
- %lsr.iv1820.c = bitcast i16* %lsr.iv.c to <4 x i16>*
- %lsr.iv17.d = bitcast i16* %lsr.iv.d to <4 x i16>*
- %lsr.cast.e = bitcast i32* %lsr.iv.e to <4 x i32>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr
+ %lsr.iv1820.c = bitcast ptr %lsr.iv.c to ptr
+ %lsr.iv17.d = bitcast ptr %lsr.iv.d to ptr
+ %lsr.cast.e = bitcast ptr %lsr.iv.e to ptr
%tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
- %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32>
- %wide.masked.load.c = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820.c, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load.c = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820.c, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%sext.load.c = sext <4 x i16> %wide.masked.load.c to <4 x i32>
- %wide.masked.load.d = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17.d, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load.d = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17.d, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%sext.load.d = sext <4 x i16> %wide.masked.load.d to <4 x i32>
%tmp12 = mul nsw <4 x i32> %tmp11, %tmp10
%mul.2 = mul nsw <4 x i32> %sext.load.c, %sext.load.d
%tmp13 = add <4 x i32> %tmp12, %mul.2
%tmp14 = add <4 x i32> %tmp13, %vec.phi
%vpnot = xor <4 x i1> %tmp8, <i1 true, i1 true, i1 true, i1 true>
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %tmp14, <4 x i32>* %lsr.cast.e, i32 4, <4 x i1> %vpnot)
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
- %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4
- %scevgep.c = getelementptr i16, i16* %lsr.iv.c, i32 4
- %scevgep.d = getelementptr i16, i16* %lsr.iv.d, i32 4
- %scevgep.e = getelementptr i32, i32* %lsr.iv.e, i32 4
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %tmp14, ptr %lsr.cast.e, i32 4, <4 x i1> %vpnot)
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
+ %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4
+ %scevgep.c = getelementptr i16, ptr %lsr.iv.c, i32 4
+ %scevgep.d = getelementptr i16, ptr %lsr.iv.d, i32 4
+ %scevgep.e = getelementptr i32, ptr %lsr.iv.e, i32 4
%tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp16 = icmp ne i32 %tmp15, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -62,8 +62,8 @@
for.cond.cleanup: ; preds = %vector.body, %entry
ret void
}
- declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2
+ declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) #1
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #2
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir
index 46a011d2e3003..2608276a423a9 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir
@@ -4,7 +4,7 @@
# General test for vpsel exclusion from tail predication
--- |
- define dso_local i32 @vpsel_after_vpt(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %c, i16* nocapture readonly %d, i32 %N) local_unnamed_addr #0 {
+ define dso_local i32 @vpsel_after_vpt(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture readonly %c, ptr nocapture readonly %d, i32 %N) local_unnamed_addr #0 {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -21,35 +21,35 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv.d = phi i16* [ %scevgep.d, %vector.body ], [ %d, %vector.ph ]
- %lsr.iv.c = phi i16* [ %scevgep.c, %vector.body ], [ %c, %vector.ph ]
- %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv.d = phi ptr [ %scevgep.d, %vector.body ], [ %d, %vector.ph ]
+ %lsr.iv.c = phi ptr [ %scevgep.c, %vector.body ], [ %c, %vector.ph ]
+ %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp14, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
- %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>*
- %lsr.iv1820.c = bitcast i16* %lsr.iv.c to <4 x i16>*
- %lsr.iv17.d = bitcast i16* %lsr.iv.d to <4 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr
+ %lsr.iv1820.c = bitcast ptr %lsr.iv.c to ptr
+ %lsr.iv17.d = bitcast ptr %lsr.iv.d to ptr
%tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
- %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32>
- %wide.masked.load.c = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820.c, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load.c = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820.c, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%sext.load.c = sext <4 x i16> %wide.masked.load.c to <4 x i32>
- %wide.masked.load.d = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17.d, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load.d = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17.d, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%sext.load.d = sext <4 x i16> %wide.masked.load.d to <4 x i32>
%tmp12 = mul nsw <4 x i32> %tmp11, %tmp10
%mul.2 = mul nsw <4 x i32> %sext.load.c, %sext.load.d
%tmp13 = add <4 x i32> %tmp12, %mul.2
%acc = add <4 x i32> %tmp13, %vec.phi
%tmp14 = select <4 x i1> %tmp8, <4 x i32> %acc, <4 x i32> %vec.phi
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
- %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4
- %scevgep.c = getelementptr i16, i16* %lsr.iv.c, i32 4
- %scevgep.d = getelementptr i16, i16* %lsr.iv.d, i32 4
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
+ %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4
+ %scevgep.c = getelementptr i16, ptr %lsr.iv.c, i32 4
+ %scevgep.d = getelementptr i16, ptr %lsr.iv.d, i32 4
%tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp16 = icmp ne i32 %tmp15, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -63,7 +63,7 @@
%res.0.lcssa = phi i32 [ 0, %entry ], [ %tmp17, %middle.block ]
ret i32 %res.0.lcssa
}
- declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
+ declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) #1
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir
index dd9fc35b54e16..a28abae7c7633 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir
@@ -6,7 +6,7 @@
# the block.
--- |
- define dso_local i32 @vpsel_after_vpt(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %c, i16* nocapture readonly %d, i32 %N) local_unnamed_addr #0 {
+ define dso_local i32 @vpsel_after_vpt(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture readonly %c, ptr nocapture readonly %d, i32 %N) local_unnamed_addr #0 {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -23,35 +23,35 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv.d = phi i16* [ %scevgep.d, %vector.body ], [ %d, %vector.ph ]
- %lsr.iv.c = phi i16* [ %scevgep.c, %vector.body ], [ %c, %vector.ph ]
- %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv.d = phi ptr [ %scevgep.d, %vector.body ], [ %d, %vector.ph ]
+ %lsr.iv.c = phi ptr [ %scevgep.c, %vector.body ], [ %c, %vector.ph ]
+ %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp14, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
- %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>*
- %lsr.iv1820.c = bitcast i16* %lsr.iv.c to <4 x i16>*
- %lsr.iv17.d = bitcast i16* %lsr.iv.d to <4 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr
+ %lsr.iv1820.c = bitcast ptr %lsr.iv.c to ptr
+ %lsr.iv17.d = bitcast ptr %lsr.iv.d to ptr
%tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
- %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32>
- %wide.masked.load.c = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820.c, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load.c = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820.c, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%sext.load.c = sext <4 x i16> %wide.masked.load.c to <4 x i32>
- %wide.masked.load.d = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17.d, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load.d = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17.d, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%sext.load.d = sext <4 x i16> %wide.masked.load.d to <4 x i32>
%tmp12 = mul nsw <4 x i32> %tmp11, %tmp10
%mul.2 = mul nsw <4 x i32> %sext.load.c, %sext.load.d
%tmp13 = add <4 x i32> %tmp12, %mul.2
%acc = add <4 x i32> %tmp13, %vec.phi
%tmp14 = select <4 x i1> %tmp8, <4 x i32> %acc, <4 x i32> %vec.phi
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
- %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4
- %scevgep.c = getelementptr i16, i16* %lsr.iv.c, i32 4
- %scevgep.d = getelementptr i16, i16* %lsr.iv.d, i32 4
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
+ %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4
+ %scevgep.c = getelementptr i16, ptr %lsr.iv.c, i32 4
+ %scevgep.d = getelementptr i16, ptr %lsr.iv.d, i32 4
%tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp16 = icmp ne i32 %tmp15, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -65,7 +65,7 @@
%res.0.lcssa = phi i32 [ 0, %entry ], [ %tmp17, %middle.block ]
ret i32 %res.0.lcssa
}
- declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
+ declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) #1
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir
index 2890b722b6b23..f4d6ce6297066 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
--- |
- define dso_local <4 x i32> @invariant_use_store(i16* nocapture readonly %a, i32* %c, i32 %N, <4 x i32> %pass) {
+ define dso_local <4 x i32> @invariant_use_store(ptr nocapture readonly %a, ptr %c, i32 %N, <4 x i32> %pass) {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -19,21 +19,21 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv20 = phi i32* [ %scevgep20, %vector.body ], [ %c, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv20 = phi ptr [ %scevgep20, %vector.body ], [ %c, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp13, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
- %lsr.store = bitcast i32* %lsr.iv20 to <4 x i32>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
+ %lsr.store = bitcast ptr %lsr.iv20 to ptr
%tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
%tmp12 = mul nsw <4 x i32> %pass, %tmp10
%tmp13 = add <4 x i32> %tmp12, %vec.phi
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %tmp13, <4 x i32>* %lsr.store, i32 4, <4 x i1> %tmp8)
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
- %scevgep20 = getelementptr i32, i32* %lsr.iv20, i32 4
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %tmp13, ptr %lsr.store, i32 4, <4 x i1> %tmp8)
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
+ %scevgep20 = getelementptr i32, ptr %lsr.iv20, i32 4
%tmp14 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp15 = icmp ne i32 %tmp14, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -43,7 +43,7 @@
ret <4 x i32> %pass
}
- define dso_local i32 @invariant_mul_use_reduce(i16* nocapture readonly %a, i32* %c, i32 %N, <4 x i32> %pass) {
+ define dso_local i32 @invariant_mul_use_reduce(ptr nocapture readonly %a, ptr %c, i32 %N, <4 x i32> %pass) {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -60,16 +60,16 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
%tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
%tmp12 = mul nsw <4 x i32> %pass, %tmp10
%tmp13 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp12)
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
%tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp16 = icmp ne i32 %tmp15, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -80,7 +80,7 @@
ret i32 %res
}
- define dso_local i32 @invariant_add_use_reduce(i16* nocapture readonly %a, i32* %c, i32 %N, <4 x i32> %pass) {
+ define dso_local i32 @invariant_add_use_reduce(ptr nocapture readonly %a, ptr %c, i32 %N, <4 x i32> %pass) {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -97,16 +97,16 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
%tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
%tmp12 = add nsw <4 x i32> %pass, %tmp10
%tmp13 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp12)
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
%tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp16 = icmp ne i32 %tmp15, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -118,8 +118,8 @@
}
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
- declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
+ declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>)
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir
index ae13493c4af83..b1749fdaad5a3 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
--- |
- define hidden arm_aapcs_vfpcc void @it_block_store_count_before_start(float* %pSrc, float* %pDst, i32 %blockSize, i32* %iter.addr) #0 {
+ define hidden arm_aapcs_vfpcc void @it_block_store_count_before_start(ptr %pSrc, ptr %pDst, i32 %blockSize, ptr %iter.addr) #0 {
entry:
%mul = shl i32 %blockSize, 1
%0 = add i32 %mul, 3
@@ -11,23 +11,23 @@
%2 = sub i32 %0, %smin
%3 = lshr i32 %2, 2
%4 = add nuw nsw i32 %3, 1
- store i32 %4, i32* %iter.addr, align 4
+ store i32 %4, ptr %iter.addr, align 4
%start = call i32 @llvm.start.loop.iterations.i32(i32 %4)
br label %do.body
do.body: ; preds = %do.body, %entry
%lsr.iv = phi i32 [ %lsr.iv.next, %do.body ], [ %start, %entry ]
%blkCnt.0 = phi i32 [ %mul, %entry ], [ %sub, %do.body ]
- %pDst.addr.0 = phi float* [ %pDst, %entry ], [ %add.ptr4, %do.body ]
- %pSrc.addr.0 = phi float* [ %pSrc, %entry ], [ %add.ptr, %do.body ]
- %pDst.addr.01 = bitcast float* %pDst.addr.0 to <4 x float>*
- %pSrc.addr.02 = bitcast float* %pSrc.addr.0 to <4 x float>*
+ %pDst.addr.0 = phi ptr [ %pDst, %entry ], [ %add.ptr4, %do.body ]
+ %pSrc.addr.0 = phi ptr [ %pSrc, %entry ], [ %add.ptr, %do.body ]
+ %pDst.addr.01 = bitcast ptr %pDst.addr.0 to ptr
+ %pSrc.addr.02 = bitcast ptr %pSrc.addr.0 to ptr
%5 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0)
- %6 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pSrc.addr.02, i32 4, <4 x i1> %5, <4 x float> undef)
+ %6 = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %pSrc.addr.02, i32 4, <4 x i1> %5, <4 x float> undef)
%7 = fmul <4 x float> %6, %6
- tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %7, <4 x float>* %pDst.addr.01, i32 4, <4 x i1> %5)
- %add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4
- %add.ptr4 = getelementptr inbounds float, float* %pDst.addr.0, i32 4
+ tail call void @llvm.masked.store.v4f32.p0(<4 x float> %7, ptr %pDst.addr.01, i32 4, <4 x i1> %5)
+ %add.ptr = getelementptr inbounds float, ptr %pSrc.addr.0, i32 4
+ %add.ptr4 = getelementptr inbounds float, ptr %pDst.addr.0, i32 4
%sub = add nsw i32 %blkCnt.0, -4
%8 = call i32 @llvm.loop.decrement.reg.i32(i32 %lsr.iv, i32 1)
%9 = icmp ne i32 %8, 0
@@ -38,7 +38,7 @@
ret void
}
- define hidden arm_aapcs_vfpcc void @it_block_store_count_after_start(float* %pSrc, float* %pDst, i32 %blockSize, i32* %iter.addr) #0 {
+ define hidden arm_aapcs_vfpcc void @it_block_store_count_after_start(ptr %pSrc, ptr %pDst, i32 %blockSize, ptr %iter.addr) #0 {
entry:
%mul = shl i32 %blockSize, 1
%0 = add i32 %mul, 3
@@ -48,22 +48,22 @@
%3 = lshr i32 %2, 2
%4 = add nuw nsw i32 %3, 1
%start = call i32 @llvm.start.loop.iterations.i32(i32 %4)
- store i32 %4, i32* %iter.addr, align 4
+ store i32 %4, ptr %iter.addr, align 4
br label %do.body
do.body: ; preds = %do.body, %entry
%lsr.iv = phi i32 [ %lsr.iv.next, %do.body ], [ %start, %entry ]
%blkCnt.0 = phi i32 [ %mul, %entry ], [ %sub, %do.body ]
- %pDst.addr.0 = phi float* [ %pDst, %entry ], [ %add.ptr4, %do.body ]
- %pSrc.addr.0 = phi float* [ %pSrc, %entry ], [ %add.ptr, %do.body ]
- %pDst.addr.01 = bitcast float* %pDst.addr.0 to <4 x float>*
- %pSrc.addr.02 = bitcast float* %pSrc.addr.0 to <4 x float>*
+ %pDst.addr.0 = phi ptr [ %pDst, %entry ], [ %add.ptr4, %do.body ]
+ %pSrc.addr.0 = phi ptr [ %pSrc, %entry ], [ %add.ptr, %do.body ]
+ %pDst.addr.01 = bitcast ptr %pDst.addr.0 to ptr
+ %pSrc.addr.02 = bitcast ptr %pSrc.addr.0 to ptr
%5 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0)
- %6 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pSrc.addr.02, i32 4, <4 x i1> %5, <4 x float> undef)
+ %6 = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %pSrc.addr.02, i32 4, <4 x i1> %5, <4 x float> undef)
%7 = fmul <4 x float> %6, %6
- tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %7, <4 x float>* %pDst.addr.01, i32 4, <4 x i1> %5)
- %add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4
- %add.ptr4 = getelementptr inbounds float, float* %pDst.addr.0, i32 4
+ tail call void @llvm.masked.store.v4f32.p0(<4 x float> %7, ptr %pDst.addr.01, i32 4, <4 x i1> %5)
+ %add.ptr = getelementptr inbounds float, ptr %pSrc.addr.0, i32 4
+ %add.ptr4 = getelementptr inbounds float, ptr %pDst.addr.0, i32 4
%sub = add nsw i32 %blkCnt.0, -4
%8 = call i32 @llvm.loop.decrement.reg.i32(i32 %lsr.iv, i32 1)
%9 = icmp ne i32 %8, 0
@@ -78,10 +78,10 @@
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1
; Function Attrs: argmemonly nounwind readonly willreturn
- declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #2
+ declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) #2
; Function Attrs: argmemonly nounwind willreturn writeonly
- declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>) #3
+ declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32 immarg, <4 x i1>) #3
; Function Attrs: noduplicate nounwind
declare i32 @llvm.start.loop.iterations.i32(i32) #4
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp-reordered.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp-reordered.mir
index dbe0ac4a7ab78..f7154763e778f 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp-reordered.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp-reordered.mir
@@ -4,7 +4,7 @@
# TODO: We should be able to handle the VCMP -> VPST -> VCMP -> VCTP case.
--- |
- define dso_local arm_aapcs_vfpcc void @test(i32* noalias nocapture %a, i32* nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
+ define dso_local arm_aapcs_vfpcc void @test(ptr noalias nocapture %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -25,12 +25,12 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv3 = phi i32* [ %scevgep4, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv1 = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv3 = phi ptr [ %scevgep4, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv1 = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ], [ %vec.ind.next, %vector.body ]
%elts.rem = phi i32 [ %N, %vector.ph ], [ %elts.rem.next, %vector.body ]
- %lsr.iv12 = bitcast i32* %lsr.iv1 to <4 x i32>*
- %lsr.iv35 = bitcast i32* %lsr.iv3 to <4 x i32>*
+ %lsr.iv12 = bitcast ptr %lsr.iv1 to ptr
+ %lsr.iv35 = bitcast ptr %lsr.iv3 to ptr
%tmp7 = insertelement <4 x i32> undef, i32 %div, i32 0
%tmp8 = shufflevector <4 x i32> %tmp7, <4 x i32> undef, <4 x i32> zeroinitializer
%tmp9 = icmp ult <4 x i32> %vec.ind, %tmp8
@@ -38,12 +38,12 @@
%tmp10 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %elts.rem)
%tmp11 = and <4 x i1> %tmp9, %tmp10
%pred = and <4 x i1> %tmp11, %lower
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv35, i32 4, <4 x i1> %pred, <4 x i32> undef)
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %wide.masked.load, <4 x i32>* %lsr.iv12, i32 4, <4 x i1> %pred)
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv35, i32 4, <4 x i1> %pred, <4 x i32> undef)
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %wide.masked.load, ptr %lsr.iv12, i32 4, <4 x i1> %pred)
%vec.ind.next = add <4 x i32> %vec.ind, <i32 4, i32 4, i32 4, i32 4>
%elts.rem.next = sub i32 %elts.rem, 4
- %scevgep = getelementptr i32, i32* %lsr.iv1, i32 4
- %scevgep4 = getelementptr i32, i32* %lsr.iv3, i32 4
+ %scevgep = getelementptr i32, ptr %lsr.iv1, i32 4
+ %scevgep4 = getelementptr i32, ptr %lsr.iv3, i32 4
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
%lsr.iv.next = add nsw i32 %lsr.iv, -1
@@ -53,8 +53,8 @@
ret void
}
- declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
+ declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp.mir
index be8fd89e6cc8f..551cf31f8a9dc 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s
--- |
- define dso_local arm_aapcs_vfpcc void @test(i32* noalias nocapture %a, i32* nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
+ define dso_local arm_aapcs_vfpcc void @test(ptr noalias nocapture %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -23,12 +23,12 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv3 = phi i32* [ %scevgep4, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv1 = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv3 = phi ptr [ %scevgep4, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv1 = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ], [ %vec.ind.next, %vector.body ]
%elts.rem = phi i32 [ %N, %vector.ph ], [ %elts.rem.next, %vector.body ]
- %lsr.iv12 = bitcast i32* %lsr.iv1 to <4 x i32>*
- %lsr.iv35 = bitcast i32* %lsr.iv3 to <4 x i32>*
+ %lsr.iv12 = bitcast ptr %lsr.iv1 to ptr
+ %lsr.iv35 = bitcast ptr %lsr.iv3 to ptr
%tmp7 = insertelement <4 x i32> undef, i32 %div, i32 0
%tmp8 = shufflevector <4 x i32> %tmp7, <4 x i32> undef, <4 x i32> zeroinitializer
%tmp9 = icmp ult <4 x i32> %vec.ind, %tmp8
@@ -36,12 +36,12 @@
%tmp10 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %elts.rem)
%tmp11 = and <4 x i1> %tmp9, %tmp10
%pred = and <4 x i1> %tmp11, %lower
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv35, i32 4, <4 x i1> %pred, <4 x i32> undef)
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %wide.masked.load, <4 x i32>* %lsr.iv12, i32 4, <4 x i1> %pred)
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv35, i32 4, <4 x i1> %pred, <4 x i32> undef)
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %wide.masked.load, ptr %lsr.iv12, i32 4, <4 x i1> %pred)
%vec.ind.next = add <4 x i32> %vec.ind, <i32 4, i32 4, i32 4, i32 4>
%elts.rem.next = sub i32 %elts.rem, 4
- %scevgep = getelementptr i32, i32* %lsr.iv1, i32 4
- %scevgep4 = getelementptr i32, i32* %lsr.iv3, i32 4
+ %scevgep = getelementptr i32, ptr %lsr.iv1, i32 4
+ %scevgep4 = getelementptr i32, ptr %lsr.iv3, i32 4
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
%lsr.iv.next = add nsw i32 %lsr.iv, -1
@@ -51,8 +51,8 @@
ret void
}
- declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
+ declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir
index ec396187b92c9..5ede970f79848 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir
@@ -3,7 +3,7 @@
--- |
; Function Attrs: nofree norecurse nounwind
- define dso_local arm_aapcs_vfpcc void @test(i32* noalias nocapture %a, i32* nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
+ define dso_local arm_aapcs_vfpcc void @test(ptr noalias nocapture %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
entry:
%cmp9 = icmp eq i32 %N, 0
%0 = add i32 %N, 3
@@ -23,24 +23,24 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv3 = phi i32* [ %scevgep4, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv1 = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv3 = phi ptr [ %scevgep4, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv1 = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ], [ %vec.ind.next, %vector.body ]
%elts.rem = phi i32 [ %N, %vector.ph ], [ %elts.rem.next, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %12, %vector.body ]
- %lsr.iv35 = bitcast i32* %lsr.iv3 to <4 x i32>*
- %lsr.iv12 = bitcast i32* %lsr.iv1 to <4 x i32>*
+ %lsr.iv35 = bitcast ptr %lsr.iv3 to ptr
+ %lsr.iv12 = bitcast ptr %lsr.iv1 to ptr
%7 = insertelement <4 x i32> undef, i32 %div, i32 0
%8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer
%9 = icmp ult <4 x i32> %vec.ind, %8
%10 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %elts.rem)
%11 = and <4 x i1> %9, %10
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv35, i32 4, <4 x i1> %11, <4 x i32> undef)
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %wide.masked.load, <4 x i32>* %lsr.iv12, i32 4, <4 x i1> %11)
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv35, i32 4, <4 x i1> %11, <4 x i32> undef)
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %wide.masked.load, ptr %lsr.iv12, i32 4, <4 x i1> %11)
%vec.ind.next = add <4 x i32> %vec.ind, <i32 4, i32 4, i32 4, i32 4>
%elts.rem.next = sub i32 %elts.rem, 4
- %scevgep = getelementptr i32, i32* %lsr.iv1, i32 4
- %scevgep4 = getelementptr i32, i32* %lsr.iv3, i32 4
+ %scevgep = getelementptr i32, ptr %lsr.iv1, i32 4
+ %scevgep4 = getelementptr i32, ptr %lsr.iv3, i32 4
%12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1)
%13 = icmp ne i32 %12, 0
br i1 %13, label %vector.body, label %for.cond.cleanup
@@ -49,8 +49,8 @@
ret void
}
- declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
+ declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/livereg-no-loop-def.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/livereg-no-loop-def.mir
index 6322ddf615b35..1e0c546b81f36 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/livereg-no-loop-def.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/livereg-no-loop-def.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s
--- |
- define dso_local <4 x i32> @exit_liveout(i16* nocapture readonly %a, i16* nocapture readonly %b, i32* %c, i32 %N, <4 x i32> %pass) {
+ define dso_local <4 x i32> @exit_liveout(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr %c, i32 %N, <4 x i32> %pass) {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -19,26 +19,26 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv20 = phi i32* [ %scevgep20, %vector.body ], [ %c, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv20 = phi ptr [ %scevgep20, %vector.body ], [ %c, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp13, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
- %lsr.store = bitcast i32* %lsr.iv20 to <4 x i32>*
- %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
+ %lsr.store = bitcast ptr %lsr.iv20 to ptr
+ %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr
%tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
- %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32>
%tmp12 = mul nsw <4 x i32> %tmp11, %tmp10
%tmp13 = add <4 x i32> %tmp12, %vec.phi
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %tmp13, <4 x i32>* %lsr.store, i32 4, <4 x i1> %tmp8)
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
- %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4
- %scevgep20 = getelementptr i32, i32* %lsr.iv20, i32 4
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %tmp13, ptr %lsr.store, i32 4, <4 x i1> %tmp8)
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
+ %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4
+ %scevgep20 = getelementptr i32, ptr %lsr.iv20, i32 4
%tmp14 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp15 = icmp ne i32 %tmp14, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -48,8 +48,8 @@
ret <4 x i32> %pass
}
- declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
+ declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>)
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir
index 34c8a251e98d2..4278cfc01057f 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s
--- |
- define dso_local arm_aapcscc void @test1(i32* nocapture %arg, i32* nocapture readonly %arg1, i32* nocapture readonly %arg2, i32 %arg3) {
+ define dso_local arm_aapcscc void @test1(ptr nocapture %arg, ptr nocapture readonly %arg1, ptr nocapture readonly %arg2, i32 %arg3) {
bb:
%tmp = icmp eq i32 %arg3, 0
br i1 %tmp, label %bb27, label %bb4
@@ -28,15 +28,15 @@
br i1 %tmp15, label %bb27, label %bb16
bb16: ; preds = %bb13
- %tmp17 = getelementptr inbounds i32, i32* %arg1, i32 %tmp14
- %tmp18 = load i32, i32* %tmp17, align 4
- %tmp19 = getelementptr inbounds i32, i32* %arg2, i32 %tmp14
- %tmp20 = load i32, i32* %tmp19, align 4
+ %tmp17 = getelementptr inbounds i32, ptr %arg1, i32 %tmp14
+ %tmp18 = load i32, ptr %tmp17, align 4
+ %tmp19 = getelementptr inbounds i32, ptr %arg2, i32 %tmp14
+ %tmp20 = load i32, ptr %tmp19, align 4
%tmp21 = xor i32 %tmp20, %tmp18
- %tmp22 = getelementptr inbounds i32, i32* %arg, i32 %tmp14
- %tmp23 = load i32, i32* %tmp22, align 4
+ %tmp22 = getelementptr inbounds i32, ptr %arg, i32 %tmp14
+ %tmp23 = load i32, ptr %tmp22, align 4
%tmp24 = add nsw i32 %tmp23, %tmp21
- store i32 %tmp24, i32* %tmp22, align 4
+ store i32 %tmp24, ptr %tmp22, align 4
%tmp25 = add nuw i32 %tmp14, 1
%tmp26 = icmp eq i32 %tmp6, 1
br i1 %tmp26, label %bb27, label %bb57
@@ -48,69 +48,69 @@
%lsr.iv15 = phi i32 [ %lsr.iv.next16, %bb28 ], [ %start, %bb12 ]
%lsr.iv = phi i32 [ %lsr.iv.next, %bb28 ], [ 0, %bb12 ]
%tmp29 = phi i32 [ 0, %bb12 ], [ %tmp54, %bb28 ]
- %0 = bitcast i32* %arg1 to i8*
- %1 = bitcast i32* %arg2 to i8*
- %2 = bitcast i32* %arg to i8*
- %uglygep14 = getelementptr i8, i8* %0, i32 %lsr.iv
- %uglygep1415 = bitcast i8* %uglygep14 to i32*
- %scevgep617 = bitcast i32* %uglygep1415 to i32*
- %tmp34 = load i32, i32* %scevgep617, align 4
- %uglygep8 = getelementptr i8, i8* %1, i32 %lsr.iv
- %uglygep89 = bitcast i8* %uglygep8 to i32*
- %scevgep418 = bitcast i32* %uglygep89 to i32*
- %tmp35 = load i32, i32* %scevgep418, align 4
+ %0 = bitcast ptr %arg1 to ptr
+ %1 = bitcast ptr %arg2 to ptr
+ %2 = bitcast ptr %arg to ptr
+ %uglygep14 = getelementptr i8, ptr %0, i32 %lsr.iv
+ %uglygep1415 = bitcast ptr %uglygep14 to ptr
+ %scevgep617 = bitcast ptr %uglygep1415 to ptr
+ %tmp34 = load i32, ptr %scevgep617, align 4
+ %uglygep8 = getelementptr i8, ptr %1, i32 %lsr.iv
+ %uglygep89 = bitcast ptr %uglygep8 to ptr
+ %scevgep418 = bitcast ptr %uglygep89 to ptr
+ %tmp35 = load i32, ptr %scevgep418, align 4
%tmp36 = xor i32 %tmp35, %tmp34
- %uglygep2 = getelementptr i8, i8* %2, i32 %lsr.iv
- %uglygep23 = bitcast i8* %uglygep2 to i32*
- %scevgep219 = bitcast i32* %uglygep23 to i32*
- %tmp37 = load i32, i32* %scevgep219, align 4
+ %uglygep2 = getelementptr i8, ptr %2, i32 %lsr.iv
+ %uglygep23 = bitcast ptr %uglygep2 to ptr
+ %scevgep219 = bitcast ptr %uglygep23 to ptr
+ %tmp37 = load i32, ptr %scevgep219, align 4
%tmp38 = add nsw i32 %tmp37, %tmp36
- store i32 %tmp38, i32* %scevgep219, align 4
- %uglygep33 = getelementptr i8, i8* %0, i32 %lsr.iv
- %uglygep3334 = bitcast i8* %uglygep33 to i32*
- %scevgep14 = getelementptr i32, i32* %uglygep3334, i32 1
- %tmp39 = load i32, i32* %scevgep14, align 4
- %uglygep27 = getelementptr i8, i8* %1, i32 %lsr.iv
- %uglygep2728 = bitcast i8* %uglygep27 to i32*
- %scevgep11 = getelementptr i32, i32* %uglygep2728, i32 1
- %tmp40 = load i32, i32* %scevgep11, align 4
+ store i32 %tmp38, ptr %scevgep219, align 4
+ %uglygep33 = getelementptr i8, ptr %0, i32 %lsr.iv
+ %uglygep3334 = bitcast ptr %uglygep33 to ptr
+ %scevgep14 = getelementptr i32, ptr %uglygep3334, i32 1
+ %tmp39 = load i32, ptr %scevgep14, align 4
+ %uglygep27 = getelementptr i8, ptr %1, i32 %lsr.iv
+ %uglygep2728 = bitcast ptr %uglygep27 to ptr
+ %scevgep11 = getelementptr i32, ptr %uglygep2728, i32 1
+ %tmp40 = load i32, ptr %scevgep11, align 4
%tmp41 = xor i32 %tmp40, %tmp39
- %uglygep20 = getelementptr i8, i8* %2, i32 %lsr.iv
- %uglygep2021 = bitcast i8* %uglygep20 to i32*
- %scevgep9 = getelementptr i32, i32* %uglygep2021, i32 1
- %tmp42 = load i32, i32* %scevgep9, align 4
+ %uglygep20 = getelementptr i8, ptr %2, i32 %lsr.iv
+ %uglygep2021 = bitcast ptr %uglygep20 to ptr
+ %scevgep9 = getelementptr i32, ptr %uglygep2021, i32 1
+ %tmp42 = load i32, ptr %scevgep9, align 4
%tmp43 = add nsw i32 %tmp42, %tmp41
- store i32 %tmp43, i32* %scevgep9, align 4
- %uglygep30 = getelementptr i8, i8* %0, i32 %lsr.iv
- %uglygep3031 = bitcast i8* %uglygep30 to i32*
- %scevgep12 = getelementptr i32, i32* %uglygep3031, i32 2
- %tmp44 = load i32, i32* %scevgep12, align 4
- %uglygep24 = getelementptr i8, i8* %1, i32 %lsr.iv
- %uglygep2425 = bitcast i8* %uglygep24 to i32*
- %scevgep10 = getelementptr i32, i32* %uglygep2425, i32 2
- %tmp45 = load i32, i32* %scevgep10, align 4
+ store i32 %tmp43, ptr %scevgep9, align 4
+ %uglygep30 = getelementptr i8, ptr %0, i32 %lsr.iv
+ %uglygep3031 = bitcast ptr %uglygep30 to ptr
+ %scevgep12 = getelementptr i32, ptr %uglygep3031, i32 2
+ %tmp44 = load i32, ptr %scevgep12, align 4
+ %uglygep24 = getelementptr i8, ptr %1, i32 %lsr.iv
+ %uglygep2425 = bitcast ptr %uglygep24 to ptr
+ %scevgep10 = getelementptr i32, ptr %uglygep2425, i32 2
+ %tmp45 = load i32, ptr %scevgep10, align 4
%tmp46 = xor i32 %tmp45, %tmp44
- %uglygep17 = getelementptr i8, i8* %2, i32 %lsr.iv
- %uglygep1718 = bitcast i8* %uglygep17 to i32*
- %scevgep8 = getelementptr i32, i32* %uglygep1718, i32 2
- %tmp47 = load i32, i32* %scevgep8, align 4
+ %uglygep17 = getelementptr i8, ptr %2, i32 %lsr.iv
+ %uglygep1718 = bitcast ptr %uglygep17 to ptr
+ %scevgep8 = getelementptr i32, ptr %uglygep1718, i32 2
+ %tmp47 = load i32, ptr %scevgep8, align 4
%tmp48 = add nsw i32 %tmp47, %tmp46
- store i32 %tmp48, i32* %scevgep8, align 4
- %uglygep11 = getelementptr i8, i8* %0, i32 %lsr.iv
- %uglygep1112 = bitcast i8* %uglygep11 to i32*
- %scevgep5 = getelementptr i32, i32* %uglygep1112, i32 3
- %tmp49 = load i32, i32* %scevgep5, align 4
- %uglygep5 = getelementptr i8, i8* %1, i32 %lsr.iv
- %uglygep56 = bitcast i8* %uglygep5 to i32*
- %scevgep3 = getelementptr i32, i32* %uglygep56, i32 3
- %tmp50 = load i32, i32* %scevgep3, align 4
+ store i32 %tmp48, ptr %scevgep8, align 4
+ %uglygep11 = getelementptr i8, ptr %0, i32 %lsr.iv
+ %uglygep1112 = bitcast ptr %uglygep11 to ptr
+ %scevgep5 = getelementptr i32, ptr %uglygep1112, i32 3
+ %tmp49 = load i32, ptr %scevgep5, align 4
+ %uglygep5 = getelementptr i8, ptr %1, i32 %lsr.iv
+ %uglygep56 = bitcast ptr %uglygep5 to ptr
+ %scevgep3 = getelementptr i32, ptr %uglygep56, i32 3
+ %tmp50 = load i32, ptr %scevgep3, align 4
%tmp51 = xor i32 %tmp50, %tmp49
- %uglygep = getelementptr i8, i8* %2, i32 %lsr.iv
- %uglygep1 = bitcast i8* %uglygep to i32*
- %scevgep1 = getelementptr i32, i32* %uglygep1, i32 3
- %tmp52 = load i32, i32* %scevgep1, align 4
+ %uglygep = getelementptr i8, ptr %2, i32 %lsr.iv
+ %uglygep1 = bitcast ptr %uglygep to ptr
+ %scevgep1 = getelementptr i32, ptr %uglygep1, i32 3
+ %tmp52 = load i32, ptr %scevgep1, align 4
%tmp53 = add nsw i32 %tmp52, %tmp51
- store i32 %tmp53, i32* %scevgep1, align 4
+ store i32 %tmp53, ptr %scevgep1, align 4
%tmp54 = add nuw i32 %tmp29, 4
%lsr.iv.next = add i32 %lsr.iv, 16
%loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv15, i32 1)
@@ -119,29 +119,29 @@
br i1 %tmp56, label %bb28, label %bb13
bb57: ; preds = %bb16
- %tmp58 = getelementptr inbounds i32, i32* %arg1, i32 %tmp25
- %tmp59 = load i32, i32* %tmp58, align 4
- %tmp60 = getelementptr inbounds i32, i32* %arg2, i32 %tmp25
- %tmp61 = load i32, i32* %tmp60, align 4
+ %tmp58 = getelementptr inbounds i32, ptr %arg1, i32 %tmp25
+ %tmp59 = load i32, ptr %tmp58, align 4
+ %tmp60 = getelementptr inbounds i32, ptr %arg2, i32 %tmp25
+ %tmp61 = load i32, ptr %tmp60, align 4
%tmp62 = xor i32 %tmp61, %tmp59
- %tmp63 = getelementptr inbounds i32, i32* %arg, i32 %tmp25
- %tmp64 = load i32, i32* %tmp63, align 4
+ %tmp63 = getelementptr inbounds i32, ptr %arg, i32 %tmp25
+ %tmp64 = load i32, ptr %tmp63, align 4
%tmp65 = add nsw i32 %tmp64, %tmp62
- store i32 %tmp65, i32* %tmp63, align 4
+ store i32 %tmp65, ptr %tmp63, align 4
%tmp66 = add nuw i32 %tmp14, 2
%tmp67 = icmp eq i32 %tmp6, 2
br i1 %tmp67, label %bb27, label %bb68
bb68: ; preds = %bb57
- %tmp69 = getelementptr inbounds i32, i32* %arg1, i32 %tmp66
- %tmp70 = load i32, i32* %tmp69, align 4
- %tmp71 = getelementptr inbounds i32, i32* %arg2, i32 %tmp66
- %tmp72 = load i32, i32* %tmp71, align 4
+ %tmp69 = getelementptr inbounds i32, ptr %arg1, i32 %tmp66
+ %tmp70 = load i32, ptr %tmp69, align 4
+ %tmp71 = getelementptr inbounds i32, ptr %arg2, i32 %tmp66
+ %tmp72 = load i32, ptr %tmp71, align 4
%tmp73 = xor i32 %tmp72, %tmp70
- %tmp74 = getelementptr inbounds i32, i32* %arg, i32 %tmp66
- %tmp75 = load i32, i32* %tmp74, align 4
+ %tmp74 = getelementptr inbounds i32, ptr %arg, i32 %tmp66
+ %tmp75 = load i32, ptr %tmp74, align 4
%tmp76 = add nsw i32 %tmp75, %tmp73
- store i32 %tmp76, i32* %tmp74, align 4
+ store i32 %tmp76, ptr %tmp74, align 4
br label %bb27
}
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir
index 214eb488a7677..14c383f95f89f 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s
--- |
- define dso_local arm_aapcscc i32 @test1(i32* nocapture %arg, i32* nocapture readonly %arg1, i32* nocapture readonly %arg2, i32 %arg3) {
+ define dso_local arm_aapcscc i32 @test1(ptr nocapture %arg, ptr nocapture readonly %arg1, ptr nocapture readonly %arg2, i32 %arg3) {
bb:
%tmp = icmp eq i32 %arg3, 0
br i1 %tmp, label %bb27, label %bb4
@@ -28,15 +28,15 @@
br i1 %tmp15, label %bb27, label %bb16
bb16: ; preds = %bb13
- %tmp17 = getelementptr inbounds i32, i32* %arg1, i32 %tmp14
- %tmp18 = load i32, i32* %tmp17, align 4
- %tmp19 = getelementptr inbounds i32, i32* %arg2, i32 %tmp14
- %tmp20 = load i32, i32* %tmp19, align 4
+ %tmp17 = getelementptr inbounds i32, ptr %arg1, i32 %tmp14
+ %tmp18 = load i32, ptr %tmp17, align 4
+ %tmp19 = getelementptr inbounds i32, ptr %arg2, i32 %tmp14
+ %tmp20 = load i32, ptr %tmp19, align 4
%tmp21 = xor i32 %tmp20, %tmp18
- %tmp22 = getelementptr inbounds i32, i32* %arg, i32 %tmp14
- %tmp23 = load i32, i32* %tmp22, align 4
+ %tmp22 = getelementptr inbounds i32, ptr %arg, i32 %tmp14
+ %tmp23 = load i32, ptr %tmp22, align 4
%tmp24 = add nsw i32 %tmp23, %tmp21
- store i32 %tmp24, i32* %tmp22, align 4
+ store i32 %tmp24, ptr %tmp22, align 4
%tmp25 = add nuw i32 %tmp14, 1
%tmp26 = icmp eq i32 %tmp6, 1
br i1 %tmp26, label %bb27, label %bb57
@@ -49,69 +49,69 @@
%lsr.iv15 = phi i32 [ %lsr.iv.next16, %bb28 ], [ %start, %bb12 ]
%lsr.iv = phi i32 [ %lsr.iv.next, %bb28 ], [ 0, %bb12 ]
%tmp29 = phi i32 [ 0, %bb12 ], [ %tmp54, %bb28 ]
- %0 = bitcast i32* %arg1 to i8*
- %1 = bitcast i32* %arg2 to i8*
- %2 = bitcast i32* %arg to i8*
- %uglygep14 = getelementptr i8, i8* %0, i32 %lsr.iv
- %uglygep1415 = bitcast i8* %uglygep14 to i32*
- %scevgep617 = bitcast i32* %uglygep1415 to i32*
- %tmp34 = load i32, i32* %scevgep617, align 4
- %uglygep8 = getelementptr i8, i8* %1, i32 %lsr.iv
- %uglygep89 = bitcast i8* %uglygep8 to i32*
- %scevgep418 = bitcast i32* %uglygep89 to i32*
- %tmp35 = load i32, i32* %scevgep418, align 4
+ %0 = bitcast ptr %arg1 to ptr
+ %1 = bitcast ptr %arg2 to ptr
+ %2 = bitcast ptr %arg to ptr
+ %uglygep14 = getelementptr i8, ptr %0, i32 %lsr.iv
+ %uglygep1415 = bitcast ptr %uglygep14 to ptr
+ %scevgep617 = bitcast ptr %uglygep1415 to ptr
+ %tmp34 = load i32, ptr %scevgep617, align 4
+ %uglygep8 = getelementptr i8, ptr %1, i32 %lsr.iv
+ %uglygep89 = bitcast ptr %uglygep8 to ptr
+ %scevgep418 = bitcast ptr %uglygep89 to ptr
+ %tmp35 = load i32, ptr %scevgep418, align 4
%tmp36 = xor i32 %tmp35, %tmp34
- %uglygep2 = getelementptr i8, i8* %2, i32 %lsr.iv
- %uglygep23 = bitcast i8* %uglygep2 to i32*
- %scevgep219 = bitcast i32* %uglygep23 to i32*
- %tmp37 = load i32, i32* %scevgep219, align 4
+ %uglygep2 = getelementptr i8, ptr %2, i32 %lsr.iv
+ %uglygep23 = bitcast ptr %uglygep2 to ptr
+ %scevgep219 = bitcast ptr %uglygep23 to ptr
+ %tmp37 = load i32, ptr %scevgep219, align 4
%tmp38 = add nsw i32 %tmp37, %tmp36
- store i32 %tmp38, i32* %scevgep219, align 4
- %uglygep33 = getelementptr i8, i8* %0, i32 %lsr.iv
- %uglygep3334 = bitcast i8* %uglygep33 to i32*
- %scevgep14 = getelementptr i32, i32* %uglygep3334, i32 1
- %tmp39 = load i32, i32* %scevgep14, align 4
- %uglygep27 = getelementptr i8, i8* %1, i32 %lsr.iv
- %uglygep2728 = bitcast i8* %uglygep27 to i32*
- %scevgep11 = getelementptr i32, i32* %uglygep2728, i32 1
- %tmp40 = load i32, i32* %scevgep11, align 4
+ store i32 %tmp38, ptr %scevgep219, align 4
+ %uglygep33 = getelementptr i8, ptr %0, i32 %lsr.iv
+ %uglygep3334 = bitcast ptr %uglygep33 to ptr
+ %scevgep14 = getelementptr i32, ptr %uglygep3334, i32 1
+ %tmp39 = load i32, ptr %scevgep14, align 4
+ %uglygep27 = getelementptr i8, ptr %1, i32 %lsr.iv
+ %uglygep2728 = bitcast ptr %uglygep27 to ptr
+ %scevgep11 = getelementptr i32, ptr %uglygep2728, i32 1
+ %tmp40 = load i32, ptr %scevgep11, align 4
%tmp41 = xor i32 %tmp40, %tmp39
- %uglygep20 = getelementptr i8, i8* %2, i32 %lsr.iv
- %uglygep2021 = bitcast i8* %uglygep20 to i32*
- %scevgep9 = getelementptr i32, i32* %uglygep2021, i32 1
- %tmp42 = load i32, i32* %scevgep9, align 4
+ %uglygep20 = getelementptr i8, ptr %2, i32 %lsr.iv
+ %uglygep2021 = bitcast ptr %uglygep20 to ptr
+ %scevgep9 = getelementptr i32, ptr %uglygep2021, i32 1
+ %tmp42 = load i32, ptr %scevgep9, align 4
%tmp43 = add nsw i32 %tmp42, %tmp41
- store i32 %tmp43, i32* %scevgep9, align 4
- %uglygep30 = getelementptr i8, i8* %0, i32 %lsr.iv
- %uglygep3031 = bitcast i8* %uglygep30 to i32*
- %scevgep12 = getelementptr i32, i32* %uglygep3031, i32 2
- %tmp44 = load i32, i32* %scevgep12, align 4
- %uglygep24 = getelementptr i8, i8* %1, i32 %lsr.iv
- %uglygep2425 = bitcast i8* %uglygep24 to i32*
- %scevgep10 = getelementptr i32, i32* %uglygep2425, i32 2
- %tmp45 = load i32, i32* %scevgep10, align 4
+ store i32 %tmp43, ptr %scevgep9, align 4
+ %uglygep30 = getelementptr i8, ptr %0, i32 %lsr.iv
+ %uglygep3031 = bitcast ptr %uglygep30 to ptr
+ %scevgep12 = getelementptr i32, ptr %uglygep3031, i32 2
+ %tmp44 = load i32, ptr %scevgep12, align 4
+ %uglygep24 = getelementptr i8, ptr %1, i32 %lsr.iv
+ %uglygep2425 = bitcast ptr %uglygep24 to ptr
+ %scevgep10 = getelementptr i32, ptr %uglygep2425, i32 2
+ %tmp45 = load i32, ptr %scevgep10, align 4
%tmp46 = xor i32 %tmp45, %tmp44
- %uglygep17 = getelementptr i8, i8* %2, i32 %lsr.iv
- %uglygep1718 = bitcast i8* %uglygep17 to i32*
- %scevgep8 = getelementptr i32, i32* %uglygep1718, i32 2
- %tmp47 = load i32, i32* %scevgep8, align 4
+ %uglygep17 = getelementptr i8, ptr %2, i32 %lsr.iv
+ %uglygep1718 = bitcast ptr %uglygep17 to ptr
+ %scevgep8 = getelementptr i32, ptr %uglygep1718, i32 2
+ %tmp47 = load i32, ptr %scevgep8, align 4
%tmp48 = add nsw i32 %tmp47, %tmp46
- store i32 %tmp48, i32* %scevgep8, align 4
- %uglygep11 = getelementptr i8, i8* %0, i32 %lsr.iv
- %uglygep1112 = bitcast i8* %uglygep11 to i32*
- %scevgep5 = getelementptr i32, i32* %uglygep1112, i32 3
- %tmp49 = load i32, i32* %scevgep5, align 4
- %uglygep5 = getelementptr i8, i8* %1, i32 %lsr.iv
- %uglygep56 = bitcast i8* %uglygep5 to i32*
- %scevgep3 = getelementptr i32, i32* %uglygep56, i32 3
- %tmp50 = load i32, i32* %scevgep3, align 4
+ store i32 %tmp48, ptr %scevgep8, align 4
+ %uglygep11 = getelementptr i8, ptr %0, i32 %lsr.iv
+ %uglygep1112 = bitcast ptr %uglygep11 to ptr
+ %scevgep5 = getelementptr i32, ptr %uglygep1112, i32 3
+ %tmp49 = load i32, ptr %scevgep5, align 4
+ %uglygep5 = getelementptr i8, ptr %1, i32 %lsr.iv
+ %uglygep56 = bitcast ptr %uglygep5 to ptr
+ %scevgep3 = getelementptr i32, ptr %uglygep56, i32 3
+ %tmp50 = load i32, ptr %scevgep3, align 4
%tmp51 = xor i32 %tmp50, %tmp49
- %uglygep = getelementptr i8, i8* %2, i32 %lsr.iv
- %uglygep1 = bitcast i8* %uglygep to i32*
- %scevgep1 = getelementptr i32, i32* %uglygep1, i32 3
- %tmp52 = load i32, i32* %scevgep1, align 4
+ %uglygep = getelementptr i8, ptr %2, i32 %lsr.iv
+ %uglygep1 = bitcast ptr %uglygep to ptr
+ %scevgep1 = getelementptr i32, ptr %uglygep1, i32 3
+ %tmp52 = load i32, ptr %scevgep1, align 4
%tmp53 = add nsw i32 %tmp52, %tmp51
- store i32 %tmp53, i32* %scevgep1, align 4
+ store i32 %tmp53, ptr %scevgep1, align 4
%tmp54 = add nuw i32 %tmp29, 4
%lsr.iv.next = add i32 %lsr.iv, 16
%loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv15, i32 1)
@@ -120,29 +120,29 @@
br i1 %tmp56, label %bb28, label %bb13
bb57: ; preds = %bb16
- %tmp58 = getelementptr inbounds i32, i32* %arg1, i32 %tmp25
- %tmp59 = load i32, i32* %tmp58, align 4
- %tmp60 = getelementptr inbounds i32, i32* %arg2, i32 %tmp25
- %tmp61 = load i32, i32* %tmp60, align 4
+ %tmp58 = getelementptr inbounds i32, ptr %arg1, i32 %tmp25
+ %tmp59 = load i32, ptr %tmp58, align 4
+ %tmp60 = getelementptr inbounds i32, ptr %arg2, i32 %tmp25
+ %tmp61 = load i32, ptr %tmp60, align 4
%tmp62 = xor i32 %tmp61, %tmp59
- %tmp63 = getelementptr inbounds i32, i32* %arg, i32 %tmp25
- %tmp64 = load i32, i32* %tmp63, align 4
+ %tmp63 = getelementptr inbounds i32, ptr %arg, i32 %tmp25
+ %tmp64 = load i32, ptr %tmp63, align 4
%tmp65 = add nsw i32 %tmp64, %tmp62
- store i32 %tmp65, i32* %tmp63, align 4
+ store i32 %tmp65, ptr %tmp63, align 4
%tmp66 = add nuw i32 %tmp14, 2
%tmp67 = icmp eq i32 %tmp6, 2
br i1 %tmp67, label %bb27, label %bb68
bb68: ; preds = %bb57
- %tmp69 = getelementptr inbounds i32, i32* %arg1, i32 %tmp66
- %tmp70 = load i32, i32* %tmp69, align 4
- %tmp71 = getelementptr inbounds i32, i32* %arg2, i32 %tmp66
- %tmp72 = load i32, i32* %tmp71, align 4
+ %tmp69 = getelementptr inbounds i32, ptr %arg1, i32 %tmp66
+ %tmp70 = load i32, ptr %tmp69, align 4
+ %tmp71 = getelementptr inbounds i32, ptr %arg2, i32 %tmp66
+ %tmp72 = load i32, ptr %tmp71, align 4
%tmp73 = xor i32 %tmp72, %tmp70
- %tmp74 = getelementptr inbounds i32, i32* %arg, i32 %tmp66
- %tmp75 = load i32, i32* %tmp74, align 4
+ %tmp74 = getelementptr inbounds i32, ptr %arg, i32 %tmp66
+ %tmp75 = load i32, ptr %tmp74, align 4
%tmp76 = add nsw i32 %tmp75, %tmp73
- store i32 %tmp76, i32* %tmp74, align 4
+ store i32 %tmp76, ptr %tmp74, align 4
br label %bb27
}
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-liveout.mir
index 8a1a3ed66c793..98f6dbc486d46 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-liveout.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-liveout.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s
--- |
- define dso_local arm_aapcscc i32 @test1(i32* nocapture %arg, i32* nocapture readonly %arg1, i32* nocapture readonly %arg2, i32 %arg3) {
+ define dso_local arm_aapcscc i32 @test1(ptr nocapture %arg, ptr nocapture readonly %arg1, ptr nocapture readonly %arg2, i32 %arg3) {
bb:
%tmp = icmp eq i32 %arg3, 0
br i1 %tmp, label %bb27, label %bb4
@@ -28,15 +28,15 @@
br i1 %tmp15, label %bb27, label %bb16
bb16: ; preds = %bb13
- %tmp17 = getelementptr inbounds i32, i32* %arg1, i32 %tmp14
- %tmp18 = load i32, i32* %tmp17, align 4
- %tmp19 = getelementptr inbounds i32, i32* %arg2, i32 %tmp14
- %tmp20 = load i32, i32* %tmp19, align 4
+ %tmp17 = getelementptr inbounds i32, ptr %arg1, i32 %tmp14
+ %tmp18 = load i32, ptr %tmp17, align 4
+ %tmp19 = getelementptr inbounds i32, ptr %arg2, i32 %tmp14
+ %tmp20 = load i32, ptr %tmp19, align 4
%tmp21 = xor i32 %tmp20, %tmp18
- %tmp22 = getelementptr inbounds i32, i32* %arg, i32 %tmp14
- %tmp23 = load i32, i32* %tmp22, align 4
+ %tmp22 = getelementptr inbounds i32, ptr %arg, i32 %tmp14
+ %tmp23 = load i32, ptr %tmp22, align 4
%tmp24 = add nsw i32 %tmp23, %tmp21
- store i32 %tmp24, i32* %tmp22, align 4
+ store i32 %tmp24, ptr %tmp22, align 4
%tmp25 = add nuw i32 %tmp14, 1
%tmp26 = icmp eq i32 %tmp6, 1
br i1 %tmp26, label %bb27, label %bb57
@@ -49,69 +49,69 @@
%lsr.iv15 = phi i32 [ %lsr.iv.next16, %bb28 ], [ %start, %bb12 ]
%lsr.iv = phi i32 [ %lsr.iv.next, %bb28 ], [ 0, %bb12 ]
%tmp29 = phi i32 [ 0, %bb12 ], [ %tmp54, %bb28 ]
- %0 = bitcast i32* %arg1 to i8*
- %1 = bitcast i32* %arg2 to i8*
- %2 = bitcast i32* %arg to i8*
- %uglygep14 = getelementptr i8, i8* %0, i32 %lsr.iv
- %uglygep1415 = bitcast i8* %uglygep14 to i32*
- %scevgep617 = bitcast i32* %uglygep1415 to i32*
- %tmp34 = load i32, i32* %scevgep617, align 4
- %uglygep8 = getelementptr i8, i8* %1, i32 %lsr.iv
- %uglygep89 = bitcast i8* %uglygep8 to i32*
- %scevgep418 = bitcast i32* %uglygep89 to i32*
- %tmp35 = load i32, i32* %scevgep418, align 4
+ %0 = bitcast ptr %arg1 to ptr
+ %1 = bitcast ptr %arg2 to ptr
+ %2 = bitcast ptr %arg to ptr
+ %uglygep14 = getelementptr i8, ptr %0, i32 %lsr.iv
+ %uglygep1415 = bitcast ptr %uglygep14 to ptr
+ %scevgep617 = bitcast ptr %uglygep1415 to ptr
+ %tmp34 = load i32, ptr %scevgep617, align 4
+ %uglygep8 = getelementptr i8, ptr %1, i32 %lsr.iv
+ %uglygep89 = bitcast ptr %uglygep8 to ptr
+ %scevgep418 = bitcast ptr %uglygep89 to ptr
+ %tmp35 = load i32, ptr %scevgep418, align 4
%tmp36 = xor i32 %tmp35, %tmp34
- %uglygep2 = getelementptr i8, i8* %2, i32 %lsr.iv
- %uglygep23 = bitcast i8* %uglygep2 to i32*
- %scevgep219 = bitcast i32* %uglygep23 to i32*
- %tmp37 = load i32, i32* %scevgep219, align 4
+ %uglygep2 = getelementptr i8, ptr %2, i32 %lsr.iv
+ %uglygep23 = bitcast ptr %uglygep2 to ptr
+ %scevgep219 = bitcast ptr %uglygep23 to ptr
+ %tmp37 = load i32, ptr %scevgep219, align 4
%tmp38 = add nsw i32 %tmp37, %tmp36
- store i32 %tmp38, i32* %scevgep219, align 4
- %uglygep33 = getelementptr i8, i8* %0, i32 %lsr.iv
- %uglygep3334 = bitcast i8* %uglygep33 to i32*
- %scevgep14 = getelementptr i32, i32* %uglygep3334, i32 1
- %tmp39 = load i32, i32* %scevgep14, align 4
- %uglygep27 = getelementptr i8, i8* %1, i32 %lsr.iv
- %uglygep2728 = bitcast i8* %uglygep27 to i32*
- %scevgep11 = getelementptr i32, i32* %uglygep2728, i32 1
- %tmp40 = load i32, i32* %scevgep11, align 4
+ store i32 %tmp38, ptr %scevgep219, align 4
+ %uglygep33 = getelementptr i8, ptr %0, i32 %lsr.iv
+ %uglygep3334 = bitcast ptr %uglygep33 to ptr
+ %scevgep14 = getelementptr i32, ptr %uglygep3334, i32 1
+ %tmp39 = load i32, ptr %scevgep14, align 4
+ %uglygep27 = getelementptr i8, ptr %1, i32 %lsr.iv
+ %uglygep2728 = bitcast ptr %uglygep27 to ptr
+ %scevgep11 = getelementptr i32, ptr %uglygep2728, i32 1
+ %tmp40 = load i32, ptr %scevgep11, align 4
%tmp41 = xor i32 %tmp40, %tmp39
- %uglygep20 = getelementptr i8, i8* %2, i32 %lsr.iv
- %uglygep2021 = bitcast i8* %uglygep20 to i32*
- %scevgep9 = getelementptr i32, i32* %uglygep2021, i32 1
- %tmp42 = load i32, i32* %scevgep9, align 4
+ %uglygep20 = getelementptr i8, ptr %2, i32 %lsr.iv
+ %uglygep2021 = bitcast ptr %uglygep20 to ptr
+ %scevgep9 = getelementptr i32, ptr %uglygep2021, i32 1
+ %tmp42 = load i32, ptr %scevgep9, align 4
%tmp43 = add nsw i32 %tmp42, %tmp41
- store i32 %tmp43, i32* %scevgep9, align 4
- %uglygep30 = getelementptr i8, i8* %0, i32 %lsr.iv
- %uglygep3031 = bitcast i8* %uglygep30 to i32*
- %scevgep12 = getelementptr i32, i32* %uglygep3031, i32 2
- %tmp44 = load i32, i32* %scevgep12, align 4
- %uglygep24 = getelementptr i8, i8* %1, i32 %lsr.iv
- %uglygep2425 = bitcast i8* %uglygep24 to i32*
- %scevgep10 = getelementptr i32, i32* %uglygep2425, i32 2
- %tmp45 = load i32, i32* %scevgep10, align 4
+ store i32 %tmp43, ptr %scevgep9, align 4
+ %uglygep30 = getelementptr i8, ptr %0, i32 %lsr.iv
+ %uglygep3031 = bitcast ptr %uglygep30 to ptr
+ %scevgep12 = getelementptr i32, ptr %uglygep3031, i32 2
+ %tmp44 = load i32, ptr %scevgep12, align 4
+ %uglygep24 = getelementptr i8, ptr %1, i32 %lsr.iv
+ %uglygep2425 = bitcast ptr %uglygep24 to ptr
+ %scevgep10 = getelementptr i32, ptr %uglygep2425, i32 2
+ %tmp45 = load i32, ptr %scevgep10, align 4
%tmp46 = xor i32 %tmp45, %tmp44
- %uglygep17 = getelementptr i8, i8* %2, i32 %lsr.iv
- %uglygep1718 = bitcast i8* %uglygep17 to i32*
- %scevgep8 = getelementptr i32, i32* %uglygep1718, i32 2
- %tmp47 = load i32, i32* %scevgep8, align 4
+ %uglygep17 = getelementptr i8, ptr %2, i32 %lsr.iv
+ %uglygep1718 = bitcast ptr %uglygep17 to ptr
+ %scevgep8 = getelementptr i32, ptr %uglygep1718, i32 2
+ %tmp47 = load i32, ptr %scevgep8, align 4
%tmp48 = add nsw i32 %tmp47, %tmp46
- store i32 %tmp48, i32* %scevgep8, align 4
- %uglygep11 = getelementptr i8, i8* %0, i32 %lsr.iv
- %uglygep1112 = bitcast i8* %uglygep11 to i32*
- %scevgep5 = getelementptr i32, i32* %uglygep1112, i32 3
- %tmp49 = load i32, i32* %scevgep5, align 4
- %uglygep5 = getelementptr i8, i8* %1, i32 %lsr.iv
- %uglygep56 = bitcast i8* %uglygep5 to i32*
- %scevgep3 = getelementptr i32, i32* %uglygep56, i32 3
- %tmp50 = load i32, i32* %scevgep3, align 4
+ store i32 %tmp48, ptr %scevgep8, align 4
+ %uglygep11 = getelementptr i8, ptr %0, i32 %lsr.iv
+ %uglygep1112 = bitcast ptr %uglygep11 to ptr
+ %scevgep5 = getelementptr i32, ptr %uglygep1112, i32 3
+ %tmp49 = load i32, ptr %scevgep5, align 4
+ %uglygep5 = getelementptr i8, ptr %1, i32 %lsr.iv
+ %uglygep56 = bitcast ptr %uglygep5 to ptr
+ %scevgep3 = getelementptr i32, ptr %uglygep56, i32 3
+ %tmp50 = load i32, ptr %scevgep3, align 4
%tmp51 = xor i32 %tmp50, %tmp49
- %uglygep = getelementptr i8, i8* %2, i32 %lsr.iv
- %uglygep1 = bitcast i8* %uglygep to i32*
- %scevgep1 = getelementptr i32, i32* %uglygep1, i32 3
- %tmp52 = load i32, i32* %scevgep1, align 4
+ %uglygep = getelementptr i8, ptr %2, i32 %lsr.iv
+ %uglygep1 = bitcast ptr %uglygep to ptr
+ %scevgep1 = getelementptr i32, ptr %uglygep1, i32 3
+ %tmp52 = load i32, ptr %scevgep1, align 4
%tmp53 = add nsw i32 %tmp52, %tmp51
- store i32 %tmp53, i32* %scevgep1, align 4
+ store i32 %tmp53, ptr %scevgep1, align 4
%tmp54 = add nuw i32 %tmp29, 4
%lsr.iv.next = add i32 %lsr.iv, 16
%loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv15, i32 1)
@@ -120,29 +120,29 @@
br i1 %tmp56, label %bb28, label %bb13
bb57: ; preds = %bb16
- %tmp58 = getelementptr inbounds i32, i32* %arg1, i32 %tmp25
- %tmp59 = load i32, i32* %tmp58, align 4
- %tmp60 = getelementptr inbounds i32, i32* %arg2, i32 %tmp25
- %tmp61 = load i32, i32* %tmp60, align 4
+ %tmp58 = getelementptr inbounds i32, ptr %arg1, i32 %tmp25
+ %tmp59 = load i32, ptr %tmp58, align 4
+ %tmp60 = getelementptr inbounds i32, ptr %arg2, i32 %tmp25
+ %tmp61 = load i32, ptr %tmp60, align 4
%tmp62 = xor i32 %tmp61, %tmp59
- %tmp63 = getelementptr inbounds i32, i32* %arg, i32 %tmp25
- %tmp64 = load i32, i32* %tmp63, align 4
+ %tmp63 = getelementptr inbounds i32, ptr %arg, i32 %tmp25
+ %tmp64 = load i32, ptr %tmp63, align 4
%tmp65 = add nsw i32 %tmp64, %tmp62
- store i32 %tmp65, i32* %tmp63, align 4
+ store i32 %tmp65, ptr %tmp63, align 4
%tmp66 = add nuw i32 %tmp14, 2
%tmp67 = icmp eq i32 %tmp6, 2
br i1 %tmp67, label %bb27, label %bb68
bb68: ; preds = %bb57
- %tmp69 = getelementptr inbounds i32, i32* %arg1, i32 %tmp66
- %tmp70 = load i32, i32* %tmp69, align 4
- %tmp71 = getelementptr inbounds i32, i32* %arg2, i32 %tmp66
- %tmp72 = load i32, i32* %tmp71, align 4
+ %tmp69 = getelementptr inbounds i32, ptr %arg1, i32 %tmp66
+ %tmp70 = load i32, ptr %tmp69, align 4
+ %tmp71 = getelementptr inbounds i32, ptr %arg2, i32 %tmp66
+ %tmp72 = load i32, ptr %tmp71, align 4
%tmp73 = xor i32 %tmp72, %tmp70
- %tmp74 = getelementptr inbounds i32, i32* %arg, i32 %tmp66
- %tmp75 = load i32, i32* %tmp74, align 4
+ %tmp74 = getelementptr inbounds i32, ptr %arg, i32 %tmp66
+ %tmp75 = load i32, ptr %tmp74, align 4
%tmp76 = add nsw i32 %tmp75, %tmp73
- store i32 %tmp76, i32* %tmp74, align 4
+ store i32 %tmp76, ptr %tmp74, align 4
br label %bb27
}
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir
index c8d03fdb68a3e..d1374679f3206 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
--- |
- define dso_local arm_aapcs_vfpcc float @insert_after_vdup_1(float* nocapture readonly %a, float* nocapture readonly %b, float %init, i32 %N) {
+ define dso_local arm_aapcs_vfpcc float @insert_after_vdup_1(ptr nocapture readonly %a, ptr nocapture readonly %b, float %init, i32 %N) {
entry:
%cmp8.not = icmp eq i32 %N, 0
%0 = add i32 %N, 3
@@ -19,22 +19,22 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv13 = phi float* [ %scevgep14, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi float* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv13 = phi ptr [ %scevgep14, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x float> [ %6, %vector.ph ], [ %13, %vector.body ]
%7 = phi i32 [ %start, %vector.ph ], [ %14, %vector.body ]
%8 = phi i32 [ %N, %vector.ph ], [ %10, %vector.body ]
- %lsr.iv12 = bitcast float* %lsr.iv to <4 x float>*
- %lsr.iv1315 = bitcast float* %lsr.iv13 to <4 x float>*
+ %lsr.iv12 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1315 = bitcast ptr %lsr.iv13 to ptr
%9 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %8)
%10 = sub i32 %8, 4
- %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %lsr.iv12, i32 4, <4 x i1> %9, <4 x float> undef)
- %wide.masked.load11 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %lsr.iv1315, i32 4, <4 x i1> %9, <4 x float> undef)
+ %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %lsr.iv12, i32 4, <4 x i1> %9, <4 x float> undef)
+ %wide.masked.load11 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %lsr.iv1315, i32 4, <4 x i1> %9, <4 x float> undef)
%11 = fmul fast <4 x float> %wide.masked.load11, %wide.masked.load
%12 = fadd fast <4 x float> %11, %vec.phi
%13 = select <4 x i1> %9, <4 x float> %12, <4 x float> %vec.phi
- %scevgep = getelementptr float, float* %lsr.iv, i32 4
- %scevgep14 = getelementptr float, float* %lsr.iv13, i32 4
+ %scevgep = getelementptr float, ptr %lsr.iv, i32 4
+ %scevgep14 = getelementptr float, ptr %lsr.iv13, i32 4
%14 = call i32 @llvm.loop.decrement.reg.i32(i32 %7, i32 1)
%15 = icmp ne i32 %14, 0
br i1 %15, label %vector.body, label %middle.block
@@ -49,7 +49,7 @@
}
; Function Attrs: norecurse nounwind readonly
- define dso_local arm_aapcs_vfpcc float @insert_after_vdup_2(float* nocapture readonly %a, float* nocapture readonly %b, float %init, i32 %N) local_unnamed_addr #0 {
+ define dso_local arm_aapcs_vfpcc float @insert_after_vdup_2(ptr nocapture readonly %a, ptr nocapture readonly %b, float %init, i32 %N) local_unnamed_addr #0 {
entry:
%shr = lshr i32 %N, 2
%cmp9.not = icmp eq i32 %shr, 0
@@ -67,22 +67,22 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv14 = phi float* [ %scevgep15, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi float* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x float> [ %6, %vector.ph ], [ %13, %vector.body ]
%7 = phi i32 [ %start, %vector.ph ], [ %14, %vector.body ]
%8 = phi i32 [ %shr, %vector.ph ], [ %10, %vector.body ]
- %lsr.iv13 = bitcast float* %lsr.iv to <4 x float>*
- %lsr.iv1416 = bitcast float* %lsr.iv14 to <4 x float>*
+ %lsr.iv13 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr
%9 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %8)
%10 = sub i32 %8, 4
- %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %lsr.iv13, i32 4, <4 x i1> %9, <4 x float> undef)
- %wide.masked.load12 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %lsr.iv1416, i32 4, <4 x i1> %9, <4 x float> undef)
+ %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %lsr.iv13, i32 4, <4 x i1> %9, <4 x float> undef)
+ %wide.masked.load12 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %lsr.iv1416, i32 4, <4 x i1> %9, <4 x float> undef)
%11 = fmul fast <4 x float> %wide.masked.load12, %wide.masked.load
%12 = fadd fast <4 x float> %11, %vec.phi
%13 = select <4 x i1> %9, <4 x float> %12, <4 x float> %vec.phi
- %scevgep = getelementptr float, float* %lsr.iv, i32 4
- %scevgep15 = getelementptr float, float* %lsr.iv14, i32 4
+ %scevgep = getelementptr float, ptr %lsr.iv, i32 4
+ %scevgep15 = getelementptr float, ptr %lsr.iv14, i32 4
%14 = call i32 @llvm.loop.decrement.reg.i32(i32 %7, i32 1)
%15 = icmp ne i32 %14, 0
br i1 %15, label %vector.body, label %middle.block
@@ -97,7 +97,7 @@
}
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
- declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
+ declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>)
declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix-debug.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix-debug.mir
index 3a0bc9d50a1d9..8cf8589041e38 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix-debug.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix-debug.mir
@@ -2,14 +2,14 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
--- |
- define dso_local arm_aapcscc void @test_debug(i32 %d, i32* %e, i16* nocapture readonly %k, i16* nocapture readonly %l) !dbg !15 {
+ define dso_local arm_aapcscc void @test_debug(i32 %d, ptr %e, ptr nocapture readonly %k, ptr nocapture readonly %l) !dbg !15 {
entry:
call void @llvm.dbg.value(metadata i32 %d, metadata !23, metadata !DIExpression()), !dbg !32
- call void @llvm.dbg.value(metadata i32* %e, metadata !24, metadata !DIExpression()), !dbg !32
- call void @llvm.dbg.value(metadata i16* %k, metadata !25, metadata !DIExpression()), !dbg !32
- call void @llvm.dbg.value(metadata i16* %l, metadata !26, metadata !DIExpression()), !dbg !32
+ call void @llvm.dbg.value(metadata ptr %e, metadata !24, metadata !DIExpression()), !dbg !32
+ call void @llvm.dbg.value(metadata ptr %k, metadata !25, metadata !DIExpression()), !dbg !32
+ call void @llvm.dbg.value(metadata ptr %l, metadata !26, metadata !DIExpression()), !dbg !32
call void @llvm.dbg.value(metadata i16 0, metadata !29, metadata !DIExpression()), !dbg !32
- %call = tail call arm_aapcscc signext i16 @get_input(i32 %d, i32* %e, i16 signext 0) #4, !dbg !33
+ %call = tail call arm_aapcscc signext i16 @get_input(i32 %d, ptr %e, i16 signext 0) #4, !dbg !33
call void @llvm.dbg.value(metadata i16 %call, metadata !28, metadata !DIExpression()), !dbg !32
call void @llvm.dbg.value(metadata i32 0, metadata !30, metadata !DIExpression()), !dbg !32
%cmp30 = icmp sgt i32 %d, 0, !dbg !34
@@ -20,52 +20,52 @@
br label %for.cond1.preheader.us, !dbg !37
for.cond1.preheader.us: ; preds = %for.cond1.preheader.us.preheader, %for.cond1.for.inc9_crit_edge.us
- %lsr.iv2 = phi i16* [ %k, %for.cond1.preheader.us.preheader ], [ %9, %for.cond1.for.inc9_crit_edge.us ]
+ %lsr.iv2 = phi ptr [ %k, %for.cond1.preheader.us.preheader ], [ %9, %for.cond1.for.inc9_crit_edge.us ]
%i.031.us = phi i32 [ %inc10.us, %for.cond1.for.inc9_crit_edge.us ], [ 0, %for.cond1.preheader.us.preheader ]
call void @llvm.dbg.value(metadata i32 %i.031.us, metadata !30, metadata !DIExpression()), !dbg !32
call void @llvm.dbg.value(metadata i32 0, metadata !31, metadata !DIExpression()), !dbg !32
- %arrayidx7.us = getelementptr inbounds i32, i32* %e, i32 %i.031.us, !dbg !38
- %arrayidx7.promoted.us = load i32, i32* %arrayidx7.us, align 4, !dbg !41
+ %arrayidx7.us = getelementptr inbounds i32, ptr %e, i32 %i.031.us, !dbg !38
+ %arrayidx7.promoted.us = load i32, ptr %arrayidx7.us, align 4, !dbg !41
%start = call i32 @llvm.start.loop.iterations.i32(i32 %d), !dbg !46
br label %for.body3.us, !dbg !46
for.body3.us: ; preds = %for.body3.us, %for.cond1.preheader.us
- %lsr.iv5 = phi i16* [ %scevgep6, %for.body3.us ], [ %lsr.iv2, %for.cond1.preheader.us ], !dbg !32
- %lsr.iv1 = phi i16* [ %scevgep, %for.body3.us ], [ %l, %for.cond1.preheader.us ], !dbg !32
+ %lsr.iv5 = phi ptr [ %scevgep6, %for.body3.us ], [ %lsr.iv2, %for.cond1.preheader.us ], !dbg !32
+ %lsr.iv1 = phi ptr [ %scevgep, %for.body3.us ], [ %l, %for.cond1.preheader.us ], !dbg !32
%add829.us = phi i32 [ %arrayidx7.promoted.us, %for.cond1.preheader.us ], [ %add8.us, %for.body3.us ], !dbg !32
%1 = phi i32 [ %start, %for.cond1.preheader.us ], [ %4, %for.body3.us ], !dbg !32
call void @llvm.dbg.value(metadata i32 undef, metadata !31, metadata !DIExpression()), !dbg !32
- %2 = load i16, i16* %lsr.iv5, align 2, !dbg !47
+ %2 = load i16, ptr %lsr.iv5, align 2, !dbg !47
%conv.us = sext i16 %2 to i32, !dbg !47
- %3 = load i16, i16* %lsr.iv1, align 2, !dbg !50
+ %3 = load i16, ptr %lsr.iv1, align 2, !dbg !50
%conv5.us = sext i16 %3 to i32, !dbg !50
%mul6.us = mul nsw i32 %conv5.us, %conv.us, !dbg !51
%add8.us = add nsw i32 %mul6.us, %add829.us, !dbg !41
call void @llvm.dbg.value(metadata i32 undef, metadata !31, metadata !DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value)), !dbg !32
- %scevgep = getelementptr i16, i16* %lsr.iv1, i32 1, !dbg !52
- %scevgep6 = getelementptr i16, i16* %lsr.iv5, i32 1, !dbg !52
+ %scevgep = getelementptr i16, ptr %lsr.iv1, i32 1, !dbg !52
+ %scevgep6 = getelementptr i16, ptr %lsr.iv5, i32 1, !dbg !52
%4 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %1, i32 1), !dbg !46
%5 = icmp ne i32 %4, 0, !dbg !46
br i1 %5, label %for.body3.us, label %for.cond1.for.inc9_crit_edge.us, !dbg !46, !llvm.loop !53
for.cond1.for.inc9_crit_edge.us: ; preds = %for.body3.us
- %6 = bitcast i16* %lsr.iv2 to i1*
+ %6 = bitcast ptr %lsr.iv2 to ptr
%sunkaddr = mul i32 %i.031.us, 4, !dbg !41
- %7 = bitcast i32* %e to i8*, !dbg !41
- %sunkaddr7 = getelementptr inbounds i8, i8* %7, i32 %sunkaddr, !dbg !41
- %8 = bitcast i8* %sunkaddr7 to i32*, !dbg !41
- store i32 %add8.us, i32* %8, align 4, !dbg !41
+ %7 = bitcast ptr %e to ptr, !dbg !41
+ %sunkaddr7 = getelementptr inbounds i8, ptr %7, i32 %sunkaddr, !dbg !41
+ %8 = bitcast ptr %sunkaddr7 to ptr, !dbg !41
+ store i32 %add8.us, ptr %8, align 4, !dbg !41
%inc10.us = add nuw nsw i32 %i.031.us, 1, !dbg !55
call void @llvm.dbg.value(metadata i32 %inc10.us, metadata !30, metadata !DIExpression()), !dbg !32
- %scevgep4 = getelementptr i1, i1* %6, i32 %0, !dbg !37
- %9 = bitcast i1* %scevgep4 to i16*, !dbg !37
+ %scevgep4 = getelementptr i1, ptr %6, i32 %0, !dbg !37
+ %9 = bitcast ptr %scevgep4 to ptr, !dbg !37
%exitcond33 = icmp eq i32 %inc10.us, %d, !dbg !34
br i1 %exitcond33, label %for.end11, label %for.cond1.preheader.us, !dbg !37, !llvm.loop !56
for.end11: ; preds = %for.cond1.for.inc9_crit_edge.us, %entry
ret void, !dbg !58
}
- declare !dbg !4 dso_local arm_aapcscc signext i16 @get_input(i32, i32*, i16 signext)
+ declare !dbg !4 dso_local arm_aapcscc signext i16 @get_input(i32, ptr, i16 signext)
declare void @llvm.dbg.value(metadata, metadata, metadata)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir
index 070a20734c2eb..4d3f2e2b10c6f 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir
@@ -4,7 +4,7 @@
# A decent sized test to handle a matrix, with scalar and vector low-overhead loops.
--- |
- define dso_local arm_aapcs_vfpcc signext i16 @matrix_test(i32 %d, i32* nocapture %e, i16* nocapture readonly %k, i16* nocapture readonly %l) {
+ define dso_local arm_aapcs_vfpcc signext i16 @matrix_test(i32 %d, ptr nocapture %e, ptr nocapture readonly %k, ptr nocapture readonly %l) {
entry:
%cmp19.i = icmp sgt i32 %d, 0
br i1 %cmp19.i, label %for.body.i.preheader, label %c.exit.thread
@@ -14,16 +14,16 @@
br label %for.body.i
c.exit.thread: ; preds = %entry
- %call169 = tail call arm_aapcs_vfpcc signext i16 bitcast (i16 (...)* @crc16 to i16 (i32)*)(i32 0)
+ %call169 = tail call arm_aapcs_vfpcc signext i16 @crc16(i32 0)
%conv270 = sext i16 %call169 to i32
br label %c.exit59
for.body.i: ; preds = %for.body.i, %for.body.i.preheader
- %lsr.iv15 = phi i32* [ %e, %for.body.i.preheader ], [ %scevgep16, %for.body.i ]
+ %lsr.iv15 = phi ptr [ %e, %for.body.i.preheader ], [ %scevgep16, %for.body.i ]
%h.022.i = phi i16 [ %h.1.i, %for.body.i ], [ 0, %for.body.i.preheader ]
%f.020.i = phi i32 [ %f.1.i, %for.body.i ], [ undef, %for.body.i.preheader ]
%0 = phi i32 [ %start1, %for.body.i.preheader ], [ %2, %for.body.i ]
- %1 = load i32, i32* %lsr.iv15, align 4
+ %1 = load i32, ptr %lsr.iv15, align 4
%add.i = add nsw i32 %1, %f.020.i
%cmp1.i = icmp sgt i32 %add.i, 0
%cmp3.i = icmp sgt i32 %1, 0
@@ -31,7 +31,7 @@
%narrow.i = and i1 %cmp3.i, %cmp1.i
%add6.i = zext i1 %narrow.i to i16
%h.1.i = add i16 %h.022.i, %add6.i
- %scevgep16 = getelementptr i32, i32* %lsr.iv15, i32 1
+ %scevgep16 = getelementptr i32, ptr %lsr.iv15, i32 1
%2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
%3 = icmp ne i32 %2, 0
br i1 %3, label %for.body.i, label %c.exit
@@ -39,7 +39,7 @@
c.exit: ; preds = %for.body.i
%4 = icmp sgt i32 %d, 0
%phitmp = sext i16 %h.1.i to i32
- %call1 = tail call arm_aapcs_vfpcc signext i16 bitcast (i16 (...)* @crc16 to i16 (i32)*)(i32 %phitmp)
+ %call1 = tail call arm_aapcs_vfpcc signext i16 @crc16(i32 %phitmp)
%conv2 = sext i16 %call1 to i32
br i1 %4, label %for.cond4.preheader.us.preheader, label %c.exit59
@@ -55,32 +55,32 @@
br label %for.cond4.preheader.us
for.cond4.preheader.us: ; preds = %middle.block, %for.cond4.preheader.us.preheader
- %lsr.iv7 = phi i16* [ %28, %middle.block ], [ %k, %for.cond4.preheader.us.preheader ]
+ %lsr.iv7 = phi ptr [ %28, %middle.block ], [ %k, %for.cond4.preheader.us.preheader ]
%i.064.us = phi i32 [ %inc15.us, %middle.block ], [ 0, %for.cond4.preheader.us.preheader ]
- %arrayidx12.us = getelementptr inbounds i32, i32* %e, i32 %i.064.us
- %arrayidx12.promoted.us = load i32, i32* %arrayidx12.us, align 4
+ %arrayidx12.us = getelementptr inbounds i32, ptr %e, i32 %i.064.us
+ %arrayidx12.promoted.us = load i32, ptr %arrayidx12.us, align 4
%11 = insertelement <4 x i32> <i32 undef, i32 0, i32 0, i32 0>, i32 %arrayidx12.promoted.us, i32 0
%start2 = call i32 @llvm.start.loop.iterations.i32(i32 %8)
br label %vector.body
vector.body: ; preds = %vector.body, %for.cond4.preheader.us
- %lsr.iv10 = phi i16* [ %scevgep11, %vector.body ], [ %lsr.iv7, %for.cond4.preheader.us ]
- %lsr.iv4 = phi i16* [ %scevgep5, %vector.body ], [ %l, %for.cond4.preheader.us ]
+ %lsr.iv10 = phi ptr [ %scevgep11, %vector.body ], [ %lsr.iv7, %for.cond4.preheader.us ]
+ %lsr.iv4 = phi ptr [ %scevgep5, %vector.body ], [ %l, %for.cond4.preheader.us ]
%vec.phi = phi <4 x i32> [ %11, %for.cond4.preheader.us ], [ %19, %vector.body ]
%12 = phi i32 [ %start2, %for.cond4.preheader.us ], [ %20, %vector.body ]
%13 = phi i32 [ %d, %for.cond4.preheader.us ], [ %15, %vector.body ]
- %lsr.iv1012 = bitcast i16* %lsr.iv10 to <4 x i16>*
- %lsr.iv46 = bitcast i16* %lsr.iv4 to <4 x i16>*
+ %lsr.iv1012 = bitcast ptr %lsr.iv10 to ptr
+ %lsr.iv46 = bitcast ptr %lsr.iv4 to ptr
%14 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %13)
%15 = sub i32 %13, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1012, i32 2, <4 x i1> %14, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1012, i32 2, <4 x i1> %14, <4 x i16> undef)
%16 = sext <4 x i16> %wide.masked.load to <4 x i32>
- %wide.masked.load76 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv46, i32 2, <4 x i1> %14, <4 x i16> undef)
+ %wide.masked.load76 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv46, i32 2, <4 x i1> %14, <4 x i16> undef)
%17 = sext <4 x i16> %wide.masked.load76 to <4 x i32>
%18 = mul nsw <4 x i32> %17, %16
%19 = add <4 x i32> %18, %vec.phi
- %scevgep5 = getelementptr i16, i16* %lsr.iv4, i32 4
- %scevgep11 = getelementptr i16, i16* %lsr.iv10, i32 4
+ %scevgep5 = getelementptr i16, ptr %lsr.iv4, i32 4
+ %scevgep11 = getelementptr i16, ptr %lsr.iv10, i32 4
%20 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %12, i32 1)
%21 = icmp ne i32 %20, 0
br i1 %21, label %vector.body, label %middle.block
@@ -89,17 +89,17 @@
%vec.phi.lcssa = phi <4 x i32> [ %vec.phi, %vector.body ]
%.lcssa = phi <4 x i32> [ %19, %vector.body ]
%22 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %10)
- %23 = bitcast i16* %lsr.iv7 to i1*
+ %23 = bitcast ptr %lsr.iv7 to ptr
%24 = select <4 x i1> %22, <4 x i32> %.lcssa, <4 x i32> %vec.phi.lcssa
%25 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %24)
%sunkaddr = mul i32 %i.064.us, 4
- %26 = bitcast i32* %e to i8*
- %sunkaddr17 = getelementptr inbounds i8, i8* %26, i32 %sunkaddr
- %27 = bitcast i8* %sunkaddr17 to i32*
- store i32 %25, i32* %27, align 4
+ %26 = bitcast ptr %e to ptr
+ %sunkaddr17 = getelementptr inbounds i8, ptr %26, i32 %sunkaddr
+ %27 = bitcast ptr %sunkaddr17 to ptr
+ store i32 %25, ptr %27, align 4
%inc15.us = add nuw nsw i32 %i.064.us, 1
- %scevgep9 = getelementptr i1, i1* %23, i32 %5
- %28 = bitcast i1* %scevgep9 to i16*
+ %scevgep9 = getelementptr i1, ptr %23, i32 %5
+ %28 = bitcast ptr %scevgep9 to ptr
%exitcond66 = icmp eq i32 %inc15.us, %d
br i1 %exitcond66, label %for.end16, label %for.cond4.preheader.us
@@ -112,11 +112,11 @@
br label %for.body.i57
for.body.i57: ; preds = %for.body.i57, %for.body.i57.preheader
- %lsr.iv1 = phi i32* [ %e, %for.body.i57.preheader ], [ %scevgep, %for.body.i57 ]
+ %lsr.iv1 = phi ptr [ %e, %for.body.i57.preheader ], [ %scevgep, %for.body.i57 ]
%h.022.i44 = phi i16 [ %h.1.i54, %for.body.i57 ], [ 0, %for.body.i57.preheader ]
%f.020.i46 = phi i32 [ %f.1.i51, %for.body.i57 ], [ undef, %for.body.i57.preheader ]
%30 = phi i32 [ %start3, %for.body.i57.preheader ], [ %32, %for.body.i57 ]
- %31 = load i32, i32* %lsr.iv1, align 4
+ %31 = load i32, ptr %lsr.iv1, align 4
%add.i48 = add nsw i32 %31, %f.020.i46
%cmp1.i49 = icmp sgt i32 %add.i48, 0
%cmp3.i50 = icmp sgt i32 %31, 0
@@ -124,7 +124,7 @@
%narrow.i52 = and i1 %cmp3.i50, %cmp1.i49
%add6.i53 = zext i1 %narrow.i52 to i16
%h.1.i54 = add i16 %h.022.i44, %add6.i53
- %scevgep = getelementptr i32, i32* %lsr.iv1, i32 1
+ %scevgep = getelementptr i32, ptr %lsr.iv1, i32 1
%32 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %30, i32 1)
%33 = icmp ne i32 %32, 0
br i1 %33, label %for.body.i57, label %c.exit59.loopexit
@@ -136,11 +136,11 @@
c.exit59: ; preds = %c.exit59.loopexit, %for.end16, %c.exit, %c.exit.thread
%conv27173 = phi i32 [ %conv2, %for.end16 ], [ %conv2, %c.exit59.loopexit ], [ %conv2, %c.exit ], [ %conv270, %c.exit.thread ]
%h.0.lcssa.i58 = phi i32 [ 0, %for.end16 ], [ %phitmp67, %c.exit59.loopexit ], [ 0, %c.exit ], [ 0, %c.exit.thread ]
- %call19 = tail call arm_aapcs_vfpcc signext i16 bitcast (i16 (...)* @crc16 to i16 (i32, i32)*)(i32 %h.0.lcssa.i58, i32 %conv27173)
+ %call19 = tail call arm_aapcs_vfpcc signext i16 @crc16(i32 %h.0.lcssa.i58, i32 %conv27173)
ret i16 %call19
}
declare dso_local arm_aapcs_vfpcc signext i16 @crc16(...) local_unnamed_addr #0
- declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
+ declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) #1
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
index af76970f18da8..0580a9725a269 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
@@ -4,7 +4,7 @@
# TODOD: As far as I can tell this test is fine. The tail predicating the second loop means we remove the instruction that would otherwise block the first.
--- |
- define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float* nocapture %pResult) #0 {
+ define arm_aapcs_vfpcc void @arm_var_f32_mve(ptr %pSrc, i32 %blockSize, ptr nocapture %pResult) #0 {
entry:
%0 = add i32 %blockSize, 3
%1 = icmp slt i32 %blockSize, 4
@@ -23,13 +23,13 @@
do.body.i: ; preds = %do.body.i, %entry
%blkCnt.0.i = phi i32 [ %13, %do.body.i ], [ %blockSize, %entry ]
%sumVec.0.i = phi <4 x float> [ %12, %do.body.i ], [ zeroinitializer, %entry ]
- %pSrc.addr.0.i = phi float* [ %add.ptr.i, %do.body.i ], [ %pSrc, %entry ]
+ %pSrc.addr.0.i = phi ptr [ %add.ptr.i, %do.body.i ], [ %pSrc, %entry ]
%9 = phi i32 [ %start1, %entry ], [ %14, %do.body.i ]
- %pSrc.addr.0.i2 = bitcast float* %pSrc.addr.0.i to <4 x float>*
+ %pSrc.addr.0.i2 = bitcast ptr %pSrc.addr.0.i to ptr
%10 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0.i)
- %11 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pSrc.addr.0.i2, i32 4, <4 x i1> %10, <4 x float> zeroinitializer)
+ %11 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %pSrc.addr.0.i2, i32 4, <4 x i1> %10, <4 x float> zeroinitializer)
%12 = tail call fast <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %sumVec.0.i, <4 x float> %11, <4 x i1> %10, <4 x float> %sumVec.0.i)
- %add.ptr.i = getelementptr inbounds float, float* %pSrc.addr.0.i, i32 4
+ %add.ptr.i = getelementptr inbounds float, ptr %pSrc.addr.0.i, i32 4
%13 = add i32 %blkCnt.0.i, -4
%14 = call i32 @llvm.loop.decrement.reg.i32(i32 %9, i32 1)
%15 = icmp ne i32 %14, 0
@@ -50,14 +50,14 @@
do.body: ; preds = %do.body, %arm_mean_f32_mve.exit
%blkCnt.0 = phi i32 [ %blockSize, %arm_mean_f32_mve.exit ], [ %26, %do.body ]
%sumVec.0 = phi <4 x float> [ zeroinitializer, %arm_mean_f32_mve.exit ], [ %25, %do.body ]
- %pSrc.addr.0 = phi float* [ %pSrc, %arm_mean_f32_mve.exit ], [ %add.ptr, %do.body ]
+ %pSrc.addr.0 = phi ptr [ %pSrc, %arm_mean_f32_mve.exit ], [ %add.ptr, %do.body ]
%21 = phi i32 [ %start2, %arm_mean_f32_mve.exit ], [ %27, %do.body ]
- %pSrc.addr.01 = bitcast float* %pSrc.addr.0 to <4 x float>*
+ %pSrc.addr.01 = bitcast ptr %pSrc.addr.0 to ptr
%22 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0)
- %23 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pSrc.addr.01, i32 4, <4 x i1> %22, <4 x float> zeroinitializer)
+ %23 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %pSrc.addr.01, i32 4, <4 x i1> %22, <4 x float> zeroinitializer)
%24 = tail call fast <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> %23, <4 x float> %20, <4 x i1> %22, <4 x float> undef)
%25 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %24, <4 x float> %24, <4 x float> %sumVec.0, <4 x i1> %22)
- %add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4
+ %add.ptr = getelementptr inbounds float, ptr %pSrc.addr.0, i32 4
%26 = add i32 %blkCnt.0, -4
%27 = call i32 @llvm.loop.decrement.reg.i32(i32 %21, i32 1)
%28 = icmp ne i32 %27, 0
@@ -69,7 +69,7 @@
%sub2 = add i32 %blockSize, -1
%conv = uitofp i32 %sub2 to float
%div = fdiv fast float %add2.i, %conv
- store float %div, float* %pResult, align 4
+ store float %div, ptr %pResult, align 4
ret void
}
@@ -83,7 +83,7 @@
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1
; Function Attrs: argmemonly nounwind readonly willreturn
- declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #2
+ declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) #2
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #1
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-lr-terminator.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-lr-terminator.mir
index 6f7a8cde23928..ae8acddcd14b5 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-lr-terminator.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-lr-terminator.mir
@@ -1,7 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
--- |
- define dso_local arm_aapcs_vfpcc void @start_before_elems(i32* noalias nocapture %a, i8* nocapture readonly %b, i8* nocapture readonly %c, i32 %N) local_unnamed_addr #0 {
+ define dso_local arm_aapcs_vfpcc void @start_before_elems(ptr noalias nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) local_unnamed_addr #0 {
entry:
%div = lshr i32 %N, 1
%cmp9 = icmp eq i32 %div, 0
@@ -18,25 +18,25 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %13, %vector.body ]
%7 = phi i32 [ %div, %vector.ph ], [ %9, %vector.body ]
- %lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>*
+ %lsr.iv1 = bitcast ptr %lsr.iv to ptr
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
%9 = sub i32 %7, 4
- %scevgep4 = getelementptr i8, i8* %b, i32 %index
- %scevgep45 = bitcast i8* %scevgep4 to <4 x i8>*
- %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %scevgep45, i32 1, <4 x i1> %8, <4 x i8> undef)
+ %scevgep4 = getelementptr i8, ptr %b, i32 %index
+ %scevgep45 = bitcast ptr %scevgep4 to ptr
+ %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %scevgep45, i32 1, <4 x i1> %8, <4 x i8> undef)
%10 = zext <4 x i8> %wide.masked.load to <4 x i32>
- %scevgep2 = getelementptr i8, i8* %c, i32 %index
- %scevgep23 = bitcast i8* %scevgep2 to <4 x i8>*
- %wide.masked.load13 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %scevgep23, i32 1, <4 x i1> %8, <4 x i8> undef)
+ %scevgep2 = getelementptr i8, ptr %c, i32 %index
+ %scevgep23 = bitcast ptr %scevgep2 to ptr
+ %wide.masked.load13 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %scevgep23, i32 1, <4 x i1> %8, <4 x i8> undef)
%11 = zext <4 x i8> %wide.masked.load13 to <4 x i32>
%12 = mul nuw nsw <4 x i32> %11, %10
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %12, <4 x i32>* %lsr.iv1, i32 4, <4 x i1> %8)
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %12, ptr %lsr.iv1, i32 4, <4 x i1> %8)
%index.next = add i32 %index, 4
- %scevgep = getelementptr i32, i32* %lsr.iv, i32 4
+ %scevgep = getelementptr i32, ptr %lsr.iv, i32 4
%13 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1)
%14 = icmp ne i32 %13, 0
br i1 %14, label %vector.body, label %for.cond.cleanup
@@ -44,8 +44,8 @@
for.cond.cleanup: ; preds = %vector.body, %entry
ret void
}
- declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>)
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
+ declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32 immarg, <4 x i1>, <4 x i8>)
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
index 9162d4a3f2142..bd461eb26c378 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s
-define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float* nocapture %pResult) {
+define arm_aapcs_vfpcc void @arm_var_f32_mve(ptr %pSrc, i32 %blockSize, ptr nocapture %pResult) {
; CHECK-LABEL: arm_var_f32_mve:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -42,13 +42,13 @@ entry:
do.body.i: ; preds = %entry, %do.body.i
%blkCnt.0.i = phi i32 [ %sub.i, %do.body.i ], [ %blockSize, %entry ]
%sumVec.0.i = phi <4 x float> [ %3, %do.body.i ], [ zeroinitializer, %entry ]
- %pSrc.addr.0.i = phi float* [ %add.ptr.i, %do.body.i ], [ %pSrc, %entry ]
+ %pSrc.addr.0.i = phi ptr [ %add.ptr.i, %do.body.i ], [ %pSrc, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0.i)
- %1 = bitcast float* %pSrc.addr.0.i to <4 x float>*
- %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
+ %1 = bitcast ptr %pSrc.addr.0.i to ptr
+ %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
%3 = tail call fast <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %sumVec.0.i, <4 x float> %2, <4 x i1> %0, <4 x float> %sumVec.0.i)
%sub.i = add nsw i32 %blkCnt.0.i, -4
- %add.ptr.i = getelementptr inbounds float, float* %pSrc.addr.0.i, i32 4
+ %add.ptr.i = getelementptr inbounds float, ptr %pSrc.addr.0.i, i32 4
%cmp.i = icmp sgt i32 %blkCnt.0.i, 4
br i1 %cmp.i, label %do.body.i, label %arm_mean_f32_mve.exit
@@ -64,14 +64,14 @@ arm_mean_f32_mve.exit: ; preds = %do.body.i
do.body: ; preds = %do.body, %arm_mean_f32_mve.exit
%blkCnt.0 = phi i32 [ %blockSize, %arm_mean_f32_mve.exit ], [ %sub, %do.body ]
%sumVec.0 = phi <4 x float> [ zeroinitializer, %arm_mean_f32_mve.exit ], [ %9, %do.body ]
- %pSrc.addr.0 = phi float* [ %pSrc, %arm_mean_f32_mve.exit ], [ %add.ptr, %do.body ]
+ %pSrc.addr.0 = phi ptr [ %pSrc, %arm_mean_f32_mve.exit ], [ %add.ptr, %do.body ]
%5 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0)
- %6 = bitcast float* %pSrc.addr.0 to <4 x float>*
- %7 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %6, i32 4, <4 x i1> %5, <4 x float> zeroinitializer)
+ %6 = bitcast ptr %pSrc.addr.0 to ptr
+ %7 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %6, i32 4, <4 x i1> %5, <4 x float> zeroinitializer)
%8 = tail call fast <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> %7, <4 x float> %.splat, <4 x i1> %5, <4 x float> undef)
%9 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %8, <4 x float> %8, <4 x float> %sumVec.0, <4 x i1> %5)
%sub = add nsw i32 %blkCnt.0, -4
- %add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4
+ %add.ptr = getelementptr inbounds float, ptr %pSrc.addr.0, i32 4
%cmp1 = icmp sgt i32 %blkCnt.0, 4
br i1 %cmp1, label %do.body, label %do.end
@@ -84,7 +84,7 @@ do.end: ; preds = %do.body
br label %cleanup
cleanup: ; preds = %entry, %do.end
- store float %div, float* %pResult, align 4
+ store float %div, ptr %pResult, align 4
ret void
}
@@ -94,7 +94,7 @@ declare <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float>, <4 x fl
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
-declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
+declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>)
declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir
index 08353c8f92f18..e9f0dbe1901f0 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir
@@ -5,7 +5,7 @@
# that the correct value is used for the dlstp.
--- |
- define dso_local arm_aapcs_vfpcc void @start_before_elems(i32* noalias nocapture %a, i8* nocapture readonly %b, i8* nocapture readonly %c, i32 %N) local_unnamed_addr #0 {
+ define dso_local arm_aapcs_vfpcc void @start_before_elems(ptr noalias nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) local_unnamed_addr #0 {
entry:
%div = lshr i32 %N, 1
%cmp9 = icmp eq i32 %div, 0
@@ -22,25 +22,25 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %13, %vector.body ]
%7 = phi i32 [ %div, %vector.ph ], [ %9, %vector.body ]
- %lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>*
+ %lsr.iv1 = bitcast ptr %lsr.iv to ptr
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
%9 = sub i32 %7, 4
- %scevgep4 = getelementptr i8, i8* %b, i32 %index
- %scevgep45 = bitcast i8* %scevgep4 to <4 x i8>*
- %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %scevgep45, i32 1, <4 x i1> %8, <4 x i8> undef)
+ %scevgep4 = getelementptr i8, ptr %b, i32 %index
+ %scevgep45 = bitcast ptr %scevgep4 to ptr
+ %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %scevgep45, i32 1, <4 x i1> %8, <4 x i8> undef)
%10 = zext <4 x i8> %wide.masked.load to <4 x i32>
- %scevgep2 = getelementptr i8, i8* %c, i32 %index
- %scevgep23 = bitcast i8* %scevgep2 to <4 x i8>*
- %wide.masked.load13 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %scevgep23, i32 1, <4 x i1> %8, <4 x i8> undef)
+ %scevgep2 = getelementptr i8, ptr %c, i32 %index
+ %scevgep23 = bitcast ptr %scevgep2 to ptr
+ %wide.masked.load13 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %scevgep23, i32 1, <4 x i1> %8, <4 x i8> undef)
%11 = zext <4 x i8> %wide.masked.load13 to <4 x i32>
%12 = mul nuw nsw <4 x i32> %11, %10
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %12, <4 x i32>* %lsr.iv1, i32 4, <4 x i1> %8)
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %12, ptr %lsr.iv1, i32 4, <4 x i1> %8)
%index.next = add i32 %index, 4
- %scevgep = getelementptr i32, i32* %lsr.iv, i32 4
+ %scevgep = getelementptr i32, ptr %lsr.iv, i32 4
%13 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1)
%14 = icmp ne i32 %13, 0
br i1 %14, label %vector.body, label %for.cond.cleanup
@@ -48,8 +48,8 @@
for.cond.cleanup: ; preds = %vector.body, %entry
ret void
}
- declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>) #1
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2
+ declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32 immarg, <4 x i1>, <4 x i8>) #1
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #2
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir
index 26b887906ea3e..fef4daf5b6bb1 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir
@@ -5,7 +5,7 @@
# that the correct value is used for the dlstp.
--- |
- define dso_local arm_aapcs_vfpcc void @start_before_elems(i32* noalias nocapture %a, i8* nocapture readonly %b, i8* nocapture readonly %c, i32 %N) local_unnamed_addr #0 {
+ define dso_local arm_aapcs_vfpcc void @start_before_elems(ptr noalias nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) local_unnamed_addr #0 {
entry:
%div = lshr i32 %N, 1
%cmp9 = icmp eq i32 %div, 0
@@ -22,25 +22,25 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %13, %vector.body ]
%7 = phi i32 [ %div, %vector.ph ], [ %9, %vector.body ]
- %lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>*
+ %lsr.iv1 = bitcast ptr %lsr.iv to ptr
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
%9 = sub i32 %7, 4
- %scevgep4 = getelementptr i8, i8* %b, i32 %index
- %scevgep45 = bitcast i8* %scevgep4 to <4 x i8>*
- %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %scevgep45, i32 1, <4 x i1> %8, <4 x i8> undef)
+ %scevgep4 = getelementptr i8, ptr %b, i32 %index
+ %scevgep45 = bitcast ptr %scevgep4 to ptr
+ %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %scevgep45, i32 1, <4 x i1> %8, <4 x i8> undef)
%10 = zext <4 x i8> %wide.masked.load to <4 x i32>
- %scevgep2 = getelementptr i8, i8* %c, i32 %index
- %scevgep23 = bitcast i8* %scevgep2 to <4 x i8>*
- %wide.masked.load13 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %scevgep23, i32 1, <4 x i1> %8, <4 x i8> undef)
+ %scevgep2 = getelementptr i8, ptr %c, i32 %index
+ %scevgep23 = bitcast ptr %scevgep2 to ptr
+ %wide.masked.load13 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %scevgep23, i32 1, <4 x i1> %8, <4 x i8> undef)
%11 = zext <4 x i8> %wide.masked.load13 to <4 x i32>
%12 = mul nuw nsw <4 x i32> %11, %10
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %12, <4 x i32>* %lsr.iv1, i32 4, <4 x i1> %8)
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %12, ptr %lsr.iv1, i32 4, <4 x i1> %8)
%index.next = add i32 %index, 4
- %scevgep = getelementptr i32, i32* %lsr.iv, i32 4
+ %scevgep = getelementptr i32, ptr %lsr.iv, i32 4
%13 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1)
%14 = icmp ne i32 %13, 0
br i1 %14, label %vector.body, label %for.cond.cleanup
@@ -48,8 +48,8 @@
for.cond.cleanup: ; preds = %vector.body, %entry
ret void
}
- declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>) #1
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2
+ declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32 immarg, <4 x i1>, <4 x i8>) #1
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #2
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-block-cond-iter-count.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-block-cond-iter-count.mir
index f17496c3d1653..6b16683ba6469 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-block-cond-iter-count.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-block-cond-iter-count.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
--- |
- define dso_local arm_aapcs_vfpcc void @multi_cond_iter_count(i32* nocapture %0, i32* nocapture readonly %1, i32 %2, i32 %3) local_unnamed_addr #0 {
+ define dso_local arm_aapcs_vfpcc void @multi_cond_iter_count(ptr nocapture %0, ptr nocapture readonly %1, i32 %2, i32 %3) local_unnamed_addr #0 {
%5 = icmp eq i32 %3, 2
%6 = select i1 %5, i32 2, i32 4
%7 = icmp eq i32 %3, 4
@@ -12,10 +12,10 @@
br i1 %10, label %64, label %11
11: ; preds = %4
- %12 = getelementptr i32, i32* %0, i32 %9
- %13 = getelementptr i32, i32* %1, i32 %9
- %14 = icmp ugt i32* %13, %0
- %15 = icmp ugt i32* %12, %1
+ %12 = getelementptr i32, ptr %0, i32 %9
+ %13 = getelementptr i32, ptr %1, i32 %9
+ %14 = icmp ugt ptr %13, %0
+ %15 = icmp ugt ptr %12, %1
%16 = and i1 %14, %15
%17 = add i32 %9, 3
%18 = lshr i32 %17, 2
@@ -44,20 +44,20 @@
br label %33
33: ; preds = %33, %32
- %34 = phi i32* [ %46, %33 ], [ %0, %32 ]
- %35 = phi i32* [ %45, %33 ], [ %1, %32 ]
+ %34 = phi ptr [ %46, %33 ], [ %0, %32 ]
+ %35 = phi ptr [ %45, %33 ], [ %1, %32 ]
%36 = phi i32 [ %start2, %32 ], [ %47, %33 ]
%37 = phi i32 [ %9, %32 ], [ %41, %33 ]
- %38 = bitcast i32* %34 to <4 x i32>*
- %39 = bitcast i32* %35 to <4 x i32>*
+ %38 = bitcast ptr %34 to ptr
+ %39 = bitcast ptr %35 to ptr
%40 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %37)
%41 = sub i32 %37, 4
- %42 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %39, i32 4, <4 x i1> %40, <4 x i32> undef)
- %43 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %38, i32 4, <4 x i1> %40, <4 x i32> undef)
+ %42 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %39, i32 4, <4 x i1> %40, <4 x i32> undef)
+ %43 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %38, i32 4, <4 x i1> %40, <4 x i32> undef)
%44 = mul nsw <4 x i32> %43, %42
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %44, <4 x i32>* %38, i32 4, <4 x i1> %40)
- %45 = getelementptr i32, i32* %35, i32 4
- %46 = getelementptr i32, i32* %34, i32 4
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %44, ptr %38, i32 4, <4 x i1> %40)
+ %45 = getelementptr i32, ptr %35, i32 4
+ %46 = getelementptr i32, ptr %34, i32 4
%47 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %36, i32 1)
%48 = icmp ne i32 %47, 0
br i1 %48, label %33, label %64
@@ -68,19 +68,19 @@
br i1 %51, label %64, label %52
52: ; preds = %49
- %53 = getelementptr inbounds i32, i32* %1, i32 %50
- %54 = load i32, i32* %53, align 4
- %55 = getelementptr inbounds i32, i32* %0, i32 %50
- %56 = load i32, i32* %55, align 4
+ %53 = getelementptr inbounds i32, ptr %1, i32 %50
+ %54 = load i32, ptr %53, align 4
+ %55 = getelementptr inbounds i32, ptr %0, i32 %50
+ %56 = load i32, ptr %55, align 4
%57 = mul nsw i32 %56, %54
- store i32 %57, i32* %55, align 4
+ store i32 %57, ptr %55, align 4
%58 = add nuw i32 %50, 1
- %59 = getelementptr inbounds i32, i32* %1, i32 %58
- %60 = load i32, i32* %59, align 4
- %61 = getelementptr inbounds i32, i32* %0, i32 %58
- %62 = load i32, i32* %61, align 4
+ %59 = getelementptr inbounds i32, ptr %1, i32 %58
+ %60 = load i32, ptr %59, align 4
+ %61 = getelementptr inbounds i32, ptr %0, i32 %58
+ %62 = load i32, ptr %61, align 4
%63 = mul nsw i32 %62, %60
- store i32 %63, i32* %61, align 4
+ store i32 %63, ptr %61, align 4
br label %64
64: ; preds = %33, %52, %49, %4
@@ -90,48 +90,48 @@
%66 = phi i32 [ %108, %65 ], [ 0, %31 ]
%67 = phi i32 [ 0, %31 ], [ %107, %65 ]
%68 = phi i32 [ %start1, %31 ], [ %109, %65 ]
- %69 = bitcast i32* %0 to i8*
- %70 = bitcast i32* %1 to i8*
- %71 = getelementptr i8, i8* %70, i32 %66
- %72 = bitcast i8* %71 to i32*
- %73 = bitcast i32* %72 to i32*
- %74 = load i32, i32* %73, align 4
- %75 = getelementptr i8, i8* %69, i32 %66
- %76 = bitcast i8* %75 to i32*
- %77 = bitcast i32* %76 to i32*
- %78 = load i32, i32* %77, align 4
+ %69 = bitcast ptr %0 to ptr
+ %70 = bitcast ptr %1 to ptr
+ %71 = getelementptr i8, ptr %70, i32 %66
+ %72 = bitcast ptr %71 to ptr
+ %73 = bitcast ptr %72 to ptr
+ %74 = load i32, ptr %73, align 4
+ %75 = getelementptr i8, ptr %69, i32 %66
+ %76 = bitcast ptr %75 to ptr
+ %77 = bitcast ptr %76 to ptr
+ %78 = load i32, ptr %77, align 4
%79 = mul nsw i32 %78, %74
- store i32 %79, i32* %77, align 4
- %80 = getelementptr i8, i8* %70, i32 %66
- %81 = bitcast i8* %80 to i32*
- %82 = getelementptr i32, i32* %81, i32 1
- %83 = load i32, i32* %82, align 4
- %84 = getelementptr i8, i8* %69, i32 %66
- %85 = bitcast i8* %84 to i32*
- %86 = getelementptr i32, i32* %85, i32 1
- %87 = load i32, i32* %86, align 4
+ store i32 %79, ptr %77, align 4
+ %80 = getelementptr i8, ptr %70, i32 %66
+ %81 = bitcast ptr %80 to ptr
+ %82 = getelementptr i32, ptr %81, i32 1
+ %83 = load i32, ptr %82, align 4
+ %84 = getelementptr i8, ptr %69, i32 %66
+ %85 = bitcast ptr %84 to ptr
+ %86 = getelementptr i32, ptr %85, i32 1
+ %87 = load i32, ptr %86, align 4
%88 = mul nsw i32 %87, %83
- store i32 %88, i32* %86, align 4
- %89 = getelementptr i8, i8* %70, i32 %66
- %90 = bitcast i8* %89 to i32*
- %91 = getelementptr i32, i32* %90, i32 2
- %92 = load i32, i32* %91, align 4
- %93 = getelementptr i8, i8* %69, i32 %66
- %94 = bitcast i8* %93 to i32*
- %95 = getelementptr i32, i32* %94, i32 2
- %96 = load i32, i32* %95, align 4
+ store i32 %88, ptr %86, align 4
+ %89 = getelementptr i8, ptr %70, i32 %66
+ %90 = bitcast ptr %89 to ptr
+ %91 = getelementptr i32, ptr %90, i32 2
+ %92 = load i32, ptr %91, align 4
+ %93 = getelementptr i8, ptr %69, i32 %66
+ %94 = bitcast ptr %93 to ptr
+ %95 = getelementptr i32, ptr %94, i32 2
+ %96 = load i32, ptr %95, align 4
%97 = mul nsw i32 %96, %92
- store i32 %97, i32* %95, align 4
- %98 = getelementptr i8, i8* %70, i32 %66
- %99 = bitcast i8* %98 to i32*
- %100 = getelementptr i32, i32* %99, i32 3
- %101 = load i32, i32* %100, align 4
- %102 = getelementptr i8, i8* %69, i32 %66
- %103 = bitcast i8* %102 to i32*
- %104 = getelementptr i32, i32* %103, i32 3
- %105 = load i32, i32* %104, align 4
+ store i32 %97, ptr %95, align 4
+ %98 = getelementptr i8, ptr %70, i32 %66
+ %99 = bitcast ptr %98 to ptr
+ %100 = getelementptr i32, ptr %99, i32 3
+ %101 = load i32, ptr %100, align 4
+ %102 = getelementptr i8, ptr %69, i32 %66
+ %103 = bitcast ptr %102 to ptr
+ %104 = getelementptr i32, ptr %103, i32 3
+ %105 = load i32, ptr %104, align 4
%106 = mul nsw i32 %105, %101
- store i32 %106, i32* %104, align 4
+ store i32 %106, ptr %104, align 4
%107 = add nuw i32 %67, 4
%108 = add i32 %66, 16
%109 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %68, i32 1)
@@ -139,8 +139,8 @@
br i1 %110, label %65, label %49
}
- declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #1
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2
+ declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #1
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #2
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-cond-iter-count.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-cond-iter-count.mir
index 5ce9a63025d04..a530c1ec55573 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-cond-iter-count.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-cond-iter-count.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s
--- |
- define dso_local arm_aapcs_vfpcc void @multi_cond_iter_count(i32* noalias nocapture %0, i32* nocapture readonly %1, i32 %2, i32 %3) {
+ define dso_local arm_aapcs_vfpcc void @multi_cond_iter_count(ptr noalias nocapture %0, ptr nocapture readonly %1, i32 %2, i32 %3) {
%5 = icmp eq i32 %3, 2
%6 = select i1 %5, i32 2, i32 4
%7 = icmp eq i32 %3, 4
@@ -22,20 +22,20 @@
br label %18
18: ; preds = %18, %17
- %19 = phi i32* [ %31, %18 ], [ %0, %17 ]
- %20 = phi i32* [ %30, %18 ], [ %1, %17 ]
+ %19 = phi ptr [ %31, %18 ], [ %0, %17 ]
+ %20 = phi ptr [ %30, %18 ], [ %1, %17 ]
%21 = phi i32 [ %start, %17 ], [ %32, %18 ]
%22 = phi i32 [ %9, %17 ], [ %26, %18 ]
- %23 = bitcast i32* %19 to <4 x i32>*
- %24 = bitcast i32* %20 to <4 x i32>*
+ %23 = bitcast ptr %19 to ptr
+ %24 = bitcast ptr %20 to ptr
%25 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %22)
%26 = sub i32 %22, 4
- %27 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %24, i32 4, <4 x i1> %25, <4 x i32> undef)
- %28 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %23, i32 4, <4 x i1> %25, <4 x i32> undef)
+ %27 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %24, i32 4, <4 x i1> %25, <4 x i32> undef)
+ %28 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %23, i32 4, <4 x i1> %25, <4 x i32> undef)
%29 = mul nsw <4 x i32> %28, %27
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %29, <4 x i32>* %23, i32 4, <4 x i1> %25)
- %30 = getelementptr i32, i32* %20, i32 4
- %31 = getelementptr i32, i32* %19, i32 4
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %29, ptr %23, i32 4, <4 x i1> %25)
+ %30 = getelementptr i32, ptr %20, i32 4
+ %31 = getelementptr i32, ptr %19, i32 4
%32 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %21, i32 1)
%33 = icmp ne i32 %32, 0
br i1 %33, label %18, label %34
@@ -43,8 +43,8 @@
34: ; preds = %18, %4
ret void
}
- declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
+ declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiple-do-loops.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiple-do-loops.mir
index 6e8ad0877f1c2..c2f8cc04bffea 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiple-do-loops.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiple-do-loops.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s
--- |
- define dso_local arm_aapcs_vfpcc void @test1(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr {
+ define dso_local arm_aapcs_vfpcc void @test1(ptr noalias nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) local_unnamed_addr {
entry:
%cmp30 = icmp eq i32 %N, 0
%0 = add i32 %N, 3
@@ -18,23 +18,23 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv68 = phi i32* [ %scevgep69, %vector.body ], [ %a, %vector.ph ]
- %lsr.iv65 = phi i32* [ %scevgep66, %vector.body ], [ %c, %vector.ph ]
- %lsr.iv62 = phi i32* [ %scevgep63, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv68 = phi ptr [ %scevgep69, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv65 = phi ptr [ %scevgep66, %vector.body ], [ %c, %vector.ph ]
+ %lsr.iv62 = phi ptr [ %scevgep63, %vector.body ], [ %b, %vector.ph ]
%6 = phi i32 [ %start1, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
- %lsr.iv6870 = bitcast i32* %lsr.iv68 to <4 x i32>*
- %lsr.iv6567 = bitcast i32* %lsr.iv65 to <4 x i32>*
- %lsr.iv6264 = bitcast i32* %lsr.iv62 to <4 x i32>*
+ %lsr.iv6870 = bitcast ptr %lsr.iv68 to ptr
+ %lsr.iv6567 = bitcast ptr %lsr.iv65 to ptr
+ %lsr.iv6264 = bitcast ptr %lsr.iv62 to ptr
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
%9 = sub i32 %7, 4
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv6264, i32 4, <4 x i1> %8, <4 x i32> undef)
- %wide.masked.load35 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv6567, i32 4, <4 x i1> %8, <4 x i32> undef)
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv6264, i32 4, <4 x i1> %8, <4 x i32> undef)
+ %wide.masked.load35 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv6567, i32 4, <4 x i1> %8, <4 x i32> undef)
%10 = mul nsw <4 x i32> %wide.masked.load35, %wide.masked.load
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %lsr.iv6870, i32 4, <4 x i1> %8)
- %scevgep63 = getelementptr i32, i32* %lsr.iv62, i32 4
- %scevgep66 = getelementptr i32, i32* %lsr.iv65, i32 4
- %scevgep69 = getelementptr i32, i32* %lsr.iv68, i32 4
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %10, ptr %lsr.iv6870, i32 4, <4 x i1> %8)
+ %scevgep63 = getelementptr i32, ptr %lsr.iv62, i32 4
+ %scevgep66 = getelementptr i32, ptr %lsr.iv65, i32 4
+ %scevgep69 = getelementptr i32, ptr %lsr.iv68, i32 4
%11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1)
%12 = icmp ne i32 %11, 0
br i1 %12, label %vector.body, label %for.cond4.preheader
@@ -54,25 +54,25 @@
br label %vector.body38
vector.body38: ; preds = %vector.body38, %vector.ph39
- %lsr.iv59 = phi i32* [ %scevgep60, %vector.body38 ], [ %a, %vector.ph39 ]
- %lsr.iv56 = phi i32* [ %scevgep57, %vector.body38 ], [ %c, %vector.ph39 ]
- %lsr.iv = phi i32* [ %scevgep, %vector.body38 ], [ %b, %vector.ph39 ]
+ %lsr.iv59 = phi ptr [ %scevgep60, %vector.body38 ], [ %a, %vector.ph39 ]
+ %lsr.iv56 = phi ptr [ %scevgep57, %vector.body38 ], [ %c, %vector.ph39 ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body38 ], [ %b, %vector.ph39 ]
%20 = phi i32 [ %start2, %vector.ph39 ], [ %26, %vector.body38 ]
%21 = phi i32 [ %N, %vector.ph39 ], [ %23, %vector.body38 ]
- %lsr.iv5961 = bitcast i32* %lsr.iv59 to <4 x i32>*
- %lsr.iv5658 = bitcast i32* %lsr.iv56 to <4 x i32>*
- %lsr.iv55 = bitcast i32* %lsr.iv to <4 x i32>*
+ %lsr.iv5961 = bitcast ptr %lsr.iv59 to ptr
+ %lsr.iv5658 = bitcast ptr %lsr.iv56 to ptr
+ %lsr.iv55 = bitcast ptr %lsr.iv to ptr
%22 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %21)
%23 = sub i32 %21, 4
- %wide.masked.load52 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv55, i32 4, <4 x i1> %22, <4 x i32> undef)
- %wide.masked.load53 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv5658, i32 4, <4 x i1> %22, <4 x i32> undef)
+ %wide.masked.load52 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv55, i32 4, <4 x i1> %22, <4 x i32> undef)
+ %wide.masked.load53 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv5658, i32 4, <4 x i1> %22, <4 x i32> undef)
%24 = xor <4 x i32> %wide.masked.load53, %wide.masked.load52
- %wide.masked.load54 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv5961, i32 4, <4 x i1> %22, <4 x i32> undef)
+ %wide.masked.load54 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv5961, i32 4, <4 x i1> %22, <4 x i32> undef)
%25 = add nsw <4 x i32> %wide.masked.load54, %24
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %25, <4 x i32>* %lsr.iv5961, i32 4, <4 x i1> %22)
- %scevgep = getelementptr i32, i32* %lsr.iv, i32 4
- %scevgep57 = getelementptr i32, i32* %lsr.iv56, i32 4
- %scevgep60 = getelementptr i32, i32* %lsr.iv59, i32 4
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %25, ptr %lsr.iv5961, i32 4, <4 x i1> %22)
+ %scevgep = getelementptr i32, ptr %lsr.iv, i32 4
+ %scevgep57 = getelementptr i32, ptr %lsr.iv56, i32 4
+ %scevgep60 = getelementptr i32, ptr %lsr.iv59, i32 4
%26 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %20, i32 1)
%27 = icmp ne i32 %26, 0
br i1 %27, label %vector.body38, label %for.cond.cleanup6
@@ -81,7 +81,7 @@
ret void
}
; Function Attrs: nofree norecurse nounwind
- define dso_local arm_aapcs_vfpcc void @test2(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr {
+ define dso_local arm_aapcs_vfpcc void @test2(ptr noalias nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) local_unnamed_addr {
entry:
%div = lshr i32 %N, 1
%cmp30 = icmp eq i32 %div, 0
@@ -98,23 +98,23 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv68 = phi i32* [ %scevgep69, %vector.body ], [ %a, %vector.ph ]
- %lsr.iv65 = phi i32* [ %scevgep66, %vector.body ], [ %c, %vector.ph ]
- %lsr.iv62 = phi i32* [ %scevgep63, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv68 = phi ptr [ %scevgep69, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv65 = phi ptr [ %scevgep66, %vector.body ], [ %c, %vector.ph ]
+ %lsr.iv62 = phi ptr [ %scevgep63, %vector.body ], [ %b, %vector.ph ]
%6 = phi i32 [ %start1, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %div, %vector.ph ], [ %9, %vector.body ]
- %lsr.iv6870 = bitcast i32* %lsr.iv68 to <4 x i32>*
- %lsr.iv6567 = bitcast i32* %lsr.iv65 to <4 x i32>*
- %lsr.iv6264 = bitcast i32* %lsr.iv62 to <4 x i32>*
+ %lsr.iv6870 = bitcast ptr %lsr.iv68 to ptr
+ %lsr.iv6567 = bitcast ptr %lsr.iv65 to ptr
+ %lsr.iv6264 = bitcast ptr %lsr.iv62 to ptr
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
%9 = sub i32 %7, 4
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv6264, i32 4, <4 x i1> %8, <4 x i32> undef)
- %wide.masked.load35 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv6567, i32 4, <4 x i1> %8, <4 x i32> undef)
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv6264, i32 4, <4 x i1> %8, <4 x i32> undef)
+ %wide.masked.load35 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv6567, i32 4, <4 x i1> %8, <4 x i32> undef)
%10 = mul nsw <4 x i32> %wide.masked.load35, %wide.masked.load
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %lsr.iv6870, i32 4, <4 x i1> %8)
- %scevgep63 = getelementptr i32, i32* %lsr.iv62, i32 4
- %scevgep66 = getelementptr i32, i32* %lsr.iv65, i32 4
- %scevgep69 = getelementptr i32, i32* %lsr.iv68, i32 4
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %10, ptr %lsr.iv6870, i32 4, <4 x i1> %8)
+ %scevgep63 = getelementptr i32, ptr %lsr.iv62, i32 4
+ %scevgep66 = getelementptr i32, ptr %lsr.iv65, i32 4
+ %scevgep69 = getelementptr i32, ptr %lsr.iv68, i32 4
%11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1)
%12 = icmp ne i32 %11, 0
br i1 %12, label %vector.body, label %for.cond4.preheader
@@ -134,25 +134,25 @@
br label %vector.body38
vector.body38: ; preds = %vector.body38, %vector.ph39
- %lsr.iv59 = phi i32* [ %scevgep60, %vector.body38 ], [ %a, %vector.ph39 ]
- %lsr.iv56 = phi i32* [ %scevgep57, %vector.body38 ], [ %c, %vector.ph39 ]
- %lsr.iv = phi i32* [ %scevgep, %vector.body38 ], [ %b, %vector.ph39 ]
+ %lsr.iv59 = phi ptr [ %scevgep60, %vector.body38 ], [ %a, %vector.ph39 ]
+ %lsr.iv56 = phi ptr [ %scevgep57, %vector.body38 ], [ %c, %vector.ph39 ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body38 ], [ %b, %vector.ph39 ]
%19 = phi i32 [ %start2, %vector.ph39 ], [ %25, %vector.body38 ]
%20 = phi i32 [ %N, %vector.ph39 ], [ %22, %vector.body38 ]
- %lsr.iv5961 = bitcast i32* %lsr.iv59 to <4 x i32>*
- %lsr.iv5658 = bitcast i32* %lsr.iv56 to <4 x i32>*
- %lsr.iv55 = bitcast i32* %lsr.iv to <4 x i32>*
+ %lsr.iv5961 = bitcast ptr %lsr.iv59 to ptr
+ %lsr.iv5658 = bitcast ptr %lsr.iv56 to ptr
+ %lsr.iv55 = bitcast ptr %lsr.iv to ptr
%21 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %20)
%22 = sub i32 %20, 4
- %wide.masked.load52 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv55, i32 4, <4 x i1> %21, <4 x i32> undef)
- %wide.masked.load53 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv5658, i32 4, <4 x i1> %21, <4 x i32> undef)
+ %wide.masked.load52 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv55, i32 4, <4 x i1> %21, <4 x i32> undef)
+ %wide.masked.load53 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv5658, i32 4, <4 x i1> %21, <4 x i32> undef)
%23 = xor <4 x i32> %wide.masked.load53, %wide.masked.load52
- %wide.masked.load54 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv5961, i32 4, <4 x i1> %21, <4 x i32> undef)
+ %wide.masked.load54 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv5961, i32 4, <4 x i1> %21, <4 x i32> undef)
%24 = add nsw <4 x i32> %wide.masked.load54, %23
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %24, <4 x i32>* %lsr.iv5961, i32 4, <4 x i1> %21)
- %scevgep = getelementptr i32, i32* %lsr.iv, i32 4
- %scevgep57 = getelementptr i32, i32* %lsr.iv56, i32 4
- %scevgep60 = getelementptr i32, i32* %lsr.iv59, i32 4
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %24, ptr %lsr.iv5961, i32 4, <4 x i1> %21)
+ %scevgep = getelementptr i32, ptr %lsr.iv, i32 4
+ %scevgep57 = getelementptr i32, ptr %lsr.iv56, i32 4
+ %scevgep60 = getelementptr i32, ptr %lsr.iv59, i32 4
%25 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %19, i32 1)
%26 = icmp ne i32 %25, 0
br i1 %26, label %vector.body38, label %for.cond.cleanup6
@@ -161,7 +161,7 @@
ret void
}
; Function Attrs: nofree norecurse nounwind
- define dso_local arm_aapcs_vfpcc void @test3(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr {
+ define dso_local arm_aapcs_vfpcc void @test3(ptr noalias nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) local_unnamed_addr {
entry:
%cmp54 = icmp eq i32 %N, 0
%0 = add i32 %N, 3
@@ -177,23 +177,23 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv123 = phi i32* [ %scevgep124, %vector.body ], [ %a, %vector.ph ]
- %lsr.iv120 = phi i32* [ %scevgep121, %vector.body ], [ %c, %vector.ph ]
- %lsr.iv117 = phi i32* [ %scevgep118, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv123 = phi ptr [ %scevgep124, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv120 = phi ptr [ %scevgep121, %vector.body ], [ %c, %vector.ph ]
+ %lsr.iv117 = phi ptr [ %scevgep118, %vector.body ], [ %b, %vector.ph ]
%6 = phi i32 [ %start1, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
- %lsr.iv123125 = bitcast i32* %lsr.iv123 to <4 x i32>*
- %lsr.iv120122 = bitcast i32* %lsr.iv120 to <4 x i32>*
- %lsr.iv117119 = bitcast i32* %lsr.iv117 to <4 x i32>*
+ %lsr.iv123125 = bitcast ptr %lsr.iv123 to ptr
+ %lsr.iv120122 = bitcast ptr %lsr.iv120 to ptr
+ %lsr.iv117119 = bitcast ptr %lsr.iv117 to ptr
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
%9 = sub i32 %7, 4
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv117119, i32 4, <4 x i1> %8, <4 x i32> undef)
- %wide.masked.load62 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv120122, i32 4, <4 x i1> %8, <4 x i32> undef)
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv117119, i32 4, <4 x i1> %8, <4 x i32> undef)
+ %wide.masked.load62 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv120122, i32 4, <4 x i1> %8, <4 x i32> undef)
%10 = mul nsw <4 x i32> %wide.masked.load62, %wide.masked.load
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %lsr.iv123125, i32 4, <4 x i1> %8)
- %scevgep118 = getelementptr i32, i32* %lsr.iv117, i32 4
- %scevgep121 = getelementptr i32, i32* %lsr.iv120, i32 4
- %scevgep124 = getelementptr i32, i32* %lsr.iv123, i32 4
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %10, ptr %lsr.iv123125, i32 4, <4 x i1> %8)
+ %scevgep118 = getelementptr i32, ptr %lsr.iv117, i32 4
+ %scevgep121 = getelementptr i32, ptr %lsr.iv120, i32 4
+ %scevgep124 = getelementptr i32, ptr %lsr.iv123, i32 4
%11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1)
%12 = icmp ne i32 %11, 0
br i1 %12, label %vector.body, label %for.cond4.preheader
@@ -214,25 +214,25 @@
br label %vector.body65
vector.body65: ; preds = %vector.body65, %vector.ph66
- %lsr.iv114 = phi i32* [ %scevgep115, %vector.body65 ], [ %a, %vector.ph66 ]
- %lsr.iv111 = phi i32* [ %scevgep112, %vector.body65 ], [ %c, %vector.ph66 ]
- %lsr.iv108 = phi i32* [ %scevgep109, %vector.body65 ], [ %b, %vector.ph66 ]
+ %lsr.iv114 = phi ptr [ %scevgep115, %vector.body65 ], [ %a, %vector.ph66 ]
+ %lsr.iv111 = phi ptr [ %scevgep112, %vector.body65 ], [ %c, %vector.ph66 ]
+ %lsr.iv108 = phi ptr [ %scevgep109, %vector.body65 ], [ %b, %vector.ph66 ]
%19 = phi i32 [ %start2, %vector.ph66 ], [ %25, %vector.body65 ]
%20 = phi i32 [ %div, %vector.ph66 ], [ %22, %vector.body65 ]
- %lsr.iv114116 = bitcast i32* %lsr.iv114 to <4 x i32>*
- %lsr.iv111113 = bitcast i32* %lsr.iv111 to <4 x i32>*
- %lsr.iv108110 = bitcast i32* %lsr.iv108 to <4 x i32>*
+ %lsr.iv114116 = bitcast ptr %lsr.iv114 to ptr
+ %lsr.iv111113 = bitcast ptr %lsr.iv111 to ptr
+ %lsr.iv108110 = bitcast ptr %lsr.iv108 to ptr
%21 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %20)
%22 = sub i32 %20, 4
- %wide.masked.load79 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv108110, i32 4, <4 x i1> %21, <4 x i32> undef)
- %wide.masked.load80 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv111113, i32 4, <4 x i1> %21, <4 x i32> undef)
+ %wide.masked.load79 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv108110, i32 4, <4 x i1> %21, <4 x i32> undef)
+ %wide.masked.load80 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv111113, i32 4, <4 x i1> %21, <4 x i32> undef)
%23 = xor <4 x i32> %wide.masked.load80, %wide.masked.load79
- %wide.masked.load81 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv114116, i32 4, <4 x i1> %21, <4 x i32> undef)
+ %wide.masked.load81 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv114116, i32 4, <4 x i1> %21, <4 x i32> undef)
%24 = add nsw <4 x i32> %wide.masked.load81, %23
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %24, <4 x i32>* %lsr.iv114116, i32 4, <4 x i1> %21)
- %scevgep109 = getelementptr i32, i32* %lsr.iv108, i32 4
- %scevgep112 = getelementptr i32, i32* %lsr.iv111, i32 4
- %scevgep115 = getelementptr i32, i32* %lsr.iv114, i32 4
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %24, ptr %lsr.iv114116, i32 4, <4 x i1> %21)
+ %scevgep109 = getelementptr i32, ptr %lsr.iv108, i32 4
+ %scevgep112 = getelementptr i32, ptr %lsr.iv111, i32 4
+ %scevgep115 = getelementptr i32, ptr %lsr.iv114, i32 4
%25 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %19, i32 1)
%26 = icmp ne i32 %25, 0
br i1 %26, label %vector.body65, label %for.cond15.preheader
@@ -252,25 +252,25 @@
br label %vector.body84
vector.body84: ; preds = %vector.body84, %vector.ph85
- %lsr.iv105 = phi i32* [ %scevgep106, %vector.body84 ], [ %a, %vector.ph85 ]
- %lsr.iv102 = phi i32* [ %scevgep103, %vector.body84 ], [ %c, %vector.ph85 ]
- %lsr.iv = phi i32* [ %scevgep, %vector.body84 ], [ %b, %vector.ph85 ]
+ %lsr.iv105 = phi ptr [ %scevgep106, %vector.body84 ], [ %a, %vector.ph85 ]
+ %lsr.iv102 = phi ptr [ %scevgep103, %vector.body84 ], [ %c, %vector.ph85 ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body84 ], [ %b, %vector.ph85 ]
%34 = phi i32 [ %start3, %vector.ph85 ], [ %40, %vector.body84 ]
%35 = phi i32 [ %N, %vector.ph85 ], [ %37, %vector.body84 ]
- %lsr.iv105107 = bitcast i32* %lsr.iv105 to <4 x i32>*
- %lsr.iv102104 = bitcast i32* %lsr.iv102 to <4 x i32>*
- %lsr.iv101 = bitcast i32* %lsr.iv to <4 x i32>*
+ %lsr.iv105107 = bitcast ptr %lsr.iv105 to ptr
+ %lsr.iv102104 = bitcast ptr %lsr.iv102 to ptr
+ %lsr.iv101 = bitcast ptr %lsr.iv to ptr
%36 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %35)
%37 = sub i32 %35, 4
- %wide.masked.load98 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv101, i32 4, <4 x i1> %36, <4 x i32> undef)
- %wide.masked.load99 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv102104, i32 4, <4 x i1> %36, <4 x i32> undef)
- %wide.masked.load100 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv105107, i32 4, <4 x i1> %36, <4 x i32> undef)
+ %wide.masked.load98 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv101, i32 4, <4 x i1> %36, <4 x i32> undef)
+ %wide.masked.load99 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv102104, i32 4, <4 x i1> %36, <4 x i32> undef)
+ %wide.masked.load100 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv105107, i32 4, <4 x i1> %36, <4 x i32> undef)
%38 = add <4 x i32> %wide.masked.load99, %wide.masked.load98
%39 = sub <4 x i32> %wide.masked.load100, %38
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %39, <4 x i32>* %lsr.iv105107, i32 4, <4 x i1> %36)
- %scevgep = getelementptr i32, i32* %lsr.iv, i32 4
- %scevgep103 = getelementptr i32, i32* %lsr.iv102, i32 4
- %scevgep106 = getelementptr i32, i32* %lsr.iv105, i32 4
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %39, ptr %lsr.iv105107, i32 4, <4 x i1> %36)
+ %scevgep = getelementptr i32, ptr %lsr.iv, i32 4
+ %scevgep103 = getelementptr i32, ptr %lsr.iv102, i32 4
+ %scevgep106 = getelementptr i32, ptr %lsr.iv105, i32 4
%40 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %34, i32 1)
%41 = icmp ne i32 %40, 0
br i1 %41, label %vector.body84, label %for.cond.cleanup17
@@ -278,8 +278,8 @@
for.cond.cleanup17: ; preds = %vector.body84, %entry, %for.cond15.preheader
ret void
}
- declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
+ declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
index cfa6eb3aca820..715f6565199ec 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s
-define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, i8* nocapture readonly %b, i32 %N) {
+define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, ptr nocapture readonly %b, i32 %N) {
; CHECK-LABEL: test_acc_scalar_char:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r2, #0
@@ -45,10 +45,10 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %5, %vector.body ]
- %0 = getelementptr inbounds i8, i8* %b, i32 %index
+ %0 = getelementptr inbounds i8, ptr %b, i32 %index
%1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
- %2 = bitcast i8* %0 to <4 x i8>*
- %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %2, i32 1, <4 x i1> %1, <4 x i8> undef)
+ %2 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %2, i32 1, <4 x i1> %1, <4 x i8> undef)
%3 = zext <4 x i8> %wide.masked.load to <4 x i32>
%4 = mul nuw nsw <4 x i32> %broadcast.splat13, %3
%5 = add nuw nsw <4 x i32> %4, %vec.phi
@@ -66,7 +66,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr
ret i32 %res.0.lcssa
}
-define arm_aapcs_vfpcc i32 @test_acc_scalar_short(i16 signext %a, i16* nocapture readonly %b, i32 %N) {
+define arm_aapcs_vfpcc i32 @test_acc_scalar_short(i16 signext %a, ptr nocapture readonly %b, i32 %N) {
; CHECK-LABEL: test_acc_scalar_short:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r2, #0
@@ -110,10 +110,10 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %5, %vector.body ]
- %0 = getelementptr inbounds i16, i16* %b, i32 %index
+ %0 = getelementptr inbounds i16, ptr %b, i32 %index
%1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
- %2 = bitcast i16* %0 to <4 x i16>*
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef)
+ %2 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %2, i32 2, <4 x i1> %1, <4 x i16> undef)
%3 = sext <4 x i16> %wide.masked.load to <4 x i32>
%4 = mul nsw <4 x i32> %broadcast.splat13, %3
%5 = add nsw <4 x i32> %4, %vec.phi
@@ -131,7 +131,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr
ret i32 %res.0.lcssa
}
-define arm_aapcs_vfpcc i32 @test_acc_scalar_uchar(i8 zeroext %a, i8* nocapture readonly %b, i32 %N) {
+define arm_aapcs_vfpcc i32 @test_acc_scalar_uchar(i8 zeroext %a, ptr nocapture readonly %b, i32 %N) {
; CHECK-LABEL: test_acc_scalar_uchar:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r2, #0
@@ -175,10 +175,10 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %5, %vector.body ]
- %0 = getelementptr inbounds i8, i8* %b, i32 %index
+ %0 = getelementptr inbounds i8, ptr %b, i32 %index
%1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
- %2 = bitcast i8* %0 to <4 x i8>*
- %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %2, i32 1, <4 x i1> %1, <4 x i8> undef)
+ %2 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %2, i32 1, <4 x i1> %1, <4 x i8> undef)
%3 = zext <4 x i8> %wide.masked.load to <4 x i32>
%4 = mul nuw nsw <4 x i32> %broadcast.splat13, %3
%5 = add nuw nsw <4 x i32> %4, %vec.phi
@@ -196,7 +196,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr
ret i32 %res.0.lcssa
}
-define arm_aapcs_vfpcc i32 @test_acc_scalar_ushort(i16 signext %a, i16* nocapture readonly %b, i32 %N) {
+define arm_aapcs_vfpcc i32 @test_acc_scalar_ushort(i16 signext %a, ptr nocapture readonly %b, i32 %N) {
; CHECK-LABEL: test_acc_scalar_ushort:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r2, #0
@@ -240,10 +240,10 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %5, %vector.body ]
- %0 = getelementptr inbounds i16, i16* %b, i32 %index
+ %0 = getelementptr inbounds i16, ptr %b, i32 %index
%1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
- %2 = bitcast i16* %0 to <4 x i16>*
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef)
+ %2 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %2, i32 2, <4 x i1> %1, <4 x i16> undef)
%3 = zext <4 x i16> %wide.masked.load to <4 x i32>
%4 = mul nsw <4 x i32> %broadcast.splat13, %3
%5 = add nsw <4 x i32> %4, %vec.phi
@@ -261,7 +261,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr
ret i32 %res.0.lcssa
}
-define arm_aapcs_vfpcc i32 @test_acc_scalar_int(i32 %a, i32* nocapture readonly %b, i32 %N) {
+define arm_aapcs_vfpcc i32 @test_acc_scalar_int(i32 %a, ptr nocapture readonly %b, i32 %N) {
; CHECK-LABEL: test_acc_scalar_int:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r2, #0
@@ -304,10 +304,10 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %4, %vector.body ]
- %0 = getelementptr inbounds i32, i32* %b, i32 %index
+ %0 = getelementptr inbounds i32, ptr %b, i32 %index
%1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
- %2 = bitcast i32* %0 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef)
+ %2 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %2, i32 4, <4 x i1> %1, <4 x i32> undef)
%3 = mul nsw <4 x i32> %wide.masked.load, %broadcast.splat12
%4 = add nsw <4 x i32> %3, %vec.phi
%index.next = add i32 %index, 4
@@ -324,7 +324,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr
ret i32 %res.0.lcssa
}
-define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly %a, i8* nocapture readonly %b, i8 zeroext %c, i32* nocapture %res, i32 %N) {
+define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(ptr nocapture readonly %a, ptr nocapture readonly %b, i8 zeroext %c, ptr nocapture %res, i32 %N) {
; CHECK-LABEL: test_vec_mul_scalar_add_char:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
@@ -411,21 +411,21 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly
; CHECK-NEXT: .LBB5_11: @ %for.cond.cleanup
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
entry:
- %res12 = bitcast i32* %res to i8*
+ %res12 = bitcast ptr %res to ptr
%cmp10 = icmp eq i32 %N, 0
br i1 %cmp10, label %for.cond.cleanup, label %for.body.lr.ph
for.body.lr.ph: ; preds = %entry
%conv3 = zext i8 %c to i32
- %scevgep = getelementptr i32, i32* %res, i32 %N
- %scevgep13 = bitcast i32* %scevgep to i8*
- %scevgep14 = getelementptr i8, i8* %a, i32 %N
- %scevgep15 = getelementptr i8, i8* %b, i32 %N
- %bound0 = icmp ugt i8* %scevgep14, %res12
- %bound1 = icmp ugt i8* %scevgep13, %a
+ %scevgep = getelementptr i32, ptr %res, i32 %N
+ %scevgep13 = bitcast ptr %scevgep to ptr
+ %scevgep14 = getelementptr i8, ptr %a, i32 %N
+ %scevgep15 = getelementptr i8, ptr %b, i32 %N
+ %bound0 = icmp ugt ptr %scevgep14, %res12
+ %bound1 = icmp ugt ptr %scevgep13, %a
%found.conflict = and i1 %bound0, %bound1
- %bound016 = icmp ugt i8* %scevgep15, %res12
- %bound117 = icmp ugt i8* %scevgep13, %b
+ %bound016 = icmp ugt ptr %scevgep15, %res12
+ %bound117 = icmp ugt ptr %scevgep13, %b
%found.conflict18 = and i1 %bound016, %bound117
%conflict.rdx = or i1 %found.conflict, %found.conflict18
br i1 %conflict.rdx, label %for.body.preheader, label %vector.ph
@@ -449,20 +449,20 @@ vector.ph: ; preds = %for.body.lr.ph
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %2 = getelementptr inbounds i8, i8* %a, i32 %index
+ %2 = getelementptr inbounds i8, ptr %a, i32 %index
%3 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
- %4 = bitcast i8* %2 to <4 x i8>*
- %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %4, i32 1, <4 x i1> %3, <4 x i8> undef)
+ %4 = bitcast ptr %2 to ptr
+ %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %4, i32 1, <4 x i1> %3, <4 x i8> undef)
%5 = zext <4 x i8> %wide.masked.load to <4 x i32>
- %6 = getelementptr inbounds i8, i8* %b, i32 %index
- %7 = bitcast i8* %6 to <4 x i8>*
- %wide.masked.load21 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %7, i32 1, <4 x i1> %3, <4 x i8> undef)
+ %6 = getelementptr inbounds i8, ptr %b, i32 %index
+ %7 = bitcast ptr %6 to ptr
+ %wide.masked.load21 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %7, i32 1, <4 x i1> %3, <4 x i8> undef)
%8 = zext <4 x i8> %wide.masked.load21 to <4 x i32>
%9 = mul nuw nsw <4 x i32> %8, %5
%10 = add nuw nsw <4 x i32> %9, %broadcast.splat23
- %11 = getelementptr inbounds i32, i32* %res, i32 %index
- %12 = bitcast i32* %11 to <4 x i32>*
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %12, i32 4, <4 x i1> %3)
+ %11 = getelementptr inbounds i32, ptr %res, i32 %index
+ %12 = bitcast ptr %11 to ptr
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %10, ptr %12, i32 4, <4 x i1> %3)
%index.next = add i32 %index, 4
%13 = icmp eq i32 %index.next, %n.vec
br i1 %13, label %for.cond.cleanup, label %vector.body
@@ -475,16 +475,16 @@ for.cond.cleanup.loopexit.unr-lcssa: ; preds = %for.body, %for.body
for.body.epil: ; preds = %for.cond.cleanup.loopexit.unr-lcssa, %for.body.epil
%i.011.epil = phi i32 [ %inc.epil, %for.body.epil ], [ %i.011.unr, %for.cond.cleanup.loopexit.unr-lcssa ]
%epil.iter = phi i32 [ %epil.iter.sub, %for.body.epil ], [ %xtraiter, %for.cond.cleanup.loopexit.unr-lcssa ]
- %arrayidx.epil = getelementptr inbounds i8, i8* %a, i32 %i.011.epil
- %14 = load i8, i8* %arrayidx.epil, align 1
+ %arrayidx.epil = getelementptr inbounds i8, ptr %a, i32 %i.011.epil
+ %14 = load i8, ptr %arrayidx.epil, align 1
%conv.epil = zext i8 %14 to i32
- %arrayidx1.epil = getelementptr inbounds i8, i8* %b, i32 %i.011.epil
- %15 = load i8, i8* %arrayidx1.epil, align 1
+ %arrayidx1.epil = getelementptr inbounds i8, ptr %b, i32 %i.011.epil
+ %15 = load i8, ptr %arrayidx1.epil, align 1
%conv2.epil = zext i8 %15 to i32
%mul.epil = mul nuw nsw i32 %conv2.epil, %conv.epil
%add.epil = add nuw nsw i32 %mul.epil, %conv3
- %arrayidx4.epil = getelementptr inbounds i32, i32* %res, i32 %i.011.epil
- store i32 %add.epil, i32* %arrayidx4.epil, align 4
+ %arrayidx4.epil = getelementptr inbounds i32, ptr %res, i32 %i.011.epil
+ store i32 %add.epil, ptr %arrayidx4.epil, align 4
%inc.epil = add nuw i32 %i.011.epil, 1
%epil.iter.sub = add i32 %epil.iter, -1
%epil.iter.cmp = icmp eq i32 %epil.iter.sub, 0
@@ -496,56 +496,56 @@ for.cond.cleanup: ; preds = %vector.body, %for.c
for.body: ; preds = %for.body, %for.body.preheader.new
%i.011 = phi i32 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ]
%niter = phi i32 [ %unroll_iter, %for.body.preheader.new ], [ %niter.nsub.3, %for.body ]
- %arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.011
- %16 = load i8, i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, ptr %a, i32 %i.011
+ %16 = load i8, ptr %arrayidx, align 1
%conv = zext i8 %16 to i32
- %arrayidx1 = getelementptr inbounds i8, i8* %b, i32 %i.011
- %17 = load i8, i8* %arrayidx1, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %b, i32 %i.011
+ %17 = load i8, ptr %arrayidx1, align 1
%conv2 = zext i8 %17 to i32
%mul = mul nuw nsw i32 %conv2, %conv
%add = add nuw nsw i32 %mul, %conv3
- %arrayidx4 = getelementptr inbounds i32, i32* %res, i32 %i.011
- store i32 %add, i32* %arrayidx4, align 4
+ %arrayidx4 = getelementptr inbounds i32, ptr %res, i32 %i.011
+ store i32 %add, ptr %arrayidx4, align 4
%inc = or disjoint i32 %i.011, 1
- %arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %inc
- %18 = load i8, i8* %arrayidx.1, align 1
+ %arrayidx.1 = getelementptr inbounds i8, ptr %a, i32 %inc
+ %18 = load i8, ptr %arrayidx.1, align 1
%conv.1 = zext i8 %18 to i32
- %arrayidx1.1 = getelementptr inbounds i8, i8* %b, i32 %inc
- %19 = load i8, i8* %arrayidx1.1, align 1
+ %arrayidx1.1 = getelementptr inbounds i8, ptr %b, i32 %inc
+ %19 = load i8, ptr %arrayidx1.1, align 1
%conv2.1 = zext i8 %19 to i32
%mul.1 = mul nuw nsw i32 %conv2.1, %conv.1
%add.1 = add nuw nsw i32 %mul.1, %conv3
- %arrayidx4.1 = getelementptr inbounds i32, i32* %res, i32 %inc
- store i32 %add.1, i32* %arrayidx4.1, align 4
+ %arrayidx4.1 = getelementptr inbounds i32, ptr %res, i32 %inc
+ store i32 %add.1, ptr %arrayidx4.1, align 4
%inc.1 = or disjoint i32 %i.011, 2
- %arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %inc.1
- %20 = load i8, i8* %arrayidx.2, align 1
+ %arrayidx.2 = getelementptr inbounds i8, ptr %a, i32 %inc.1
+ %20 = load i8, ptr %arrayidx.2, align 1
%conv.2 = zext i8 %20 to i32
- %arrayidx1.2 = getelementptr inbounds i8, i8* %b, i32 %inc.1
- %21 = load i8, i8* %arrayidx1.2, align 1
+ %arrayidx1.2 = getelementptr inbounds i8, ptr %b, i32 %inc.1
+ %21 = load i8, ptr %arrayidx1.2, align 1
%conv2.2 = zext i8 %21 to i32
%mul.2 = mul nuw nsw i32 %conv2.2, %conv.2
%add.2 = add nuw nsw i32 %mul.2, %conv3
- %arrayidx4.2 = getelementptr inbounds i32, i32* %res, i32 %inc.1
- store i32 %add.2, i32* %arrayidx4.2, align 4
+ %arrayidx4.2 = getelementptr inbounds i32, ptr %res, i32 %inc.1
+ store i32 %add.2, ptr %arrayidx4.2, align 4
%inc.2 = or disjoint i32 %i.011, 3
- %arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %inc.2
- %22 = load i8, i8* %arrayidx.3, align 1
+ %arrayidx.3 = getelementptr inbounds i8, ptr %a, i32 %inc.2
+ %22 = load i8, ptr %arrayidx.3, align 1
%conv.3 = zext i8 %22 to i32
- %arrayidx1.3 = getelementptr inbounds i8, i8* %b, i32 %inc.2
- %23 = load i8, i8* %arrayidx1.3, align 1
+ %arrayidx1.3 = getelementptr inbounds i8, ptr %b, i32 %inc.2
+ %23 = load i8, ptr %arrayidx1.3, align 1
%conv2.3 = zext i8 %23 to i32
%mul.3 = mul nuw nsw i32 %conv2.3, %conv.3
%add.3 = add nuw nsw i32 %mul.3, %conv3
- %arrayidx4.3 = getelementptr inbounds i32, i32* %res, i32 %inc.2
- store i32 %add.3, i32* %arrayidx4.3, align 4
+ %arrayidx4.3 = getelementptr inbounds i32, ptr %res, i32 %inc.2
+ store i32 %add.3, ptr %arrayidx4.3, align 4
%inc.3 = add nuw i32 %i.011, 4
%niter.nsub.3 = add i32 %niter, -4
%niter.ncmp.3 = icmp eq i32 %niter.nsub.3, 0
br i1 %niter.ncmp.3, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body
}
-define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_short(i16* nocapture readonly %a, i16* nocapture readonly %b, i16 signext %c, i32* nocapture %res, i32 %N) {
+define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_short(ptr nocapture readonly %a, ptr nocapture readonly %b, i16 signext %c, ptr nocapture %res, i32 %N) {
; CHECK-LABEL: test_vec_mul_scalar_add_short:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r4, lr}
@@ -578,20 +578,20 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds i16, i16* %a, i32 %index
+ %0 = getelementptr inbounds i16, ptr %a, i32 %index
%1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
- %2 = bitcast i16* %0 to <4 x i16>*
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef)
+ %2 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %2, i32 2, <4 x i1> %1, <4 x i16> undef)
%3 = sext <4 x i16> %wide.masked.load to <4 x i32>
- %4 = getelementptr inbounds i16, i16* %b, i32 %index
- %5 = bitcast i16* %4 to <4 x i16>*
- %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %5, i32 2, <4 x i1> %1, <4 x i16> undef)
+ %4 = getelementptr inbounds i16, ptr %b, i32 %index
+ %5 = bitcast ptr %4 to ptr
+ %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %5, i32 2, <4 x i1> %1, <4 x i16> undef)
%6 = sext <4 x i16> %wide.masked.load14 to <4 x i32>
%7 = mul nsw <4 x i32> %6, %3
%8 = add nsw <4 x i32> %7, %broadcast.splat16
- %9 = getelementptr inbounds i32, i32* %res, i32 %index
- %10 = bitcast i32* %9 to <4 x i32>*
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %8, <4 x i32>* %10, i32 4, <4 x i1> %1)
+ %9 = getelementptr inbounds i32, ptr %res, i32 %index
+ %10 = bitcast ptr %9 to ptr
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %8, ptr %10, i32 4, <4 x i1> %1)
%index.next = add i32 %index, 4
%11 = icmp eq i32 %index.next, %n.vec
br i1 %11, label %for.cond.cleanup, label %vector.body
@@ -600,7 +600,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret void
}
-define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonly %a, i8* nocapture readonly %b, i8 zeroext %c, i32* nocapture %res, i32 %N) {
+define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(ptr nocapture readonly %a, ptr nocapture readonly %b, i8 zeroext %c, ptr nocapture %res, i32 %N) {
; CHECK-LABEL: test_vec_mul_scalar_add_uchar:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
@@ -687,21 +687,21 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl
; CHECK-NEXT: .LBB7_11: @ %for.cond.cleanup
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
entry:
- %res12 = bitcast i32* %res to i8*
+ %res12 = bitcast ptr %res to ptr
%cmp10 = icmp eq i32 %N, 0
br i1 %cmp10, label %for.cond.cleanup, label %for.body.lr.ph
for.body.lr.ph: ; preds = %entry
%conv3 = zext i8 %c to i32
- %scevgep = getelementptr i32, i32* %res, i32 %N
- %scevgep13 = bitcast i32* %scevgep to i8*
- %scevgep14 = getelementptr i8, i8* %a, i32 %N
- %scevgep15 = getelementptr i8, i8* %b, i32 %N
- %bound0 = icmp ugt i8* %scevgep14, %res12
- %bound1 = icmp ugt i8* %scevgep13, %a
+ %scevgep = getelementptr i32, ptr %res, i32 %N
+ %scevgep13 = bitcast ptr %scevgep to ptr
+ %scevgep14 = getelementptr i8, ptr %a, i32 %N
+ %scevgep15 = getelementptr i8, ptr %b, i32 %N
+ %bound0 = icmp ugt ptr %scevgep14, %res12
+ %bound1 = icmp ugt ptr %scevgep13, %a
%found.conflict = and i1 %bound0, %bound1
- %bound016 = icmp ugt i8* %scevgep15, %res12
- %bound117 = icmp ugt i8* %scevgep13, %b
+ %bound016 = icmp ugt ptr %scevgep15, %res12
+ %bound117 = icmp ugt ptr %scevgep13, %b
%found.conflict18 = and i1 %bound016, %bound117
%conflict.rdx = or i1 %found.conflict, %found.conflict18
br i1 %conflict.rdx, label %for.body.preheader, label %vector.ph
@@ -725,20 +725,20 @@ vector.ph: ; preds = %for.body.lr.ph
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %2 = getelementptr inbounds i8, i8* %a, i32 %index
+ %2 = getelementptr inbounds i8, ptr %a, i32 %index
%3 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
- %4 = bitcast i8* %2 to <4 x i8>*
- %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %4, i32 1, <4 x i1> %3, <4 x i8> undef)
+ %4 = bitcast ptr %2 to ptr
+ %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %4, i32 1, <4 x i1> %3, <4 x i8> undef)
%5 = zext <4 x i8> %wide.masked.load to <4 x i32>
- %6 = getelementptr inbounds i8, i8* %b, i32 %index
- %7 = bitcast i8* %6 to <4 x i8>*
- %wide.masked.load21 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %7, i32 1, <4 x i1> %3, <4 x i8> undef)
+ %6 = getelementptr inbounds i8, ptr %b, i32 %index
+ %7 = bitcast ptr %6 to ptr
+ %wide.masked.load21 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %7, i32 1, <4 x i1> %3, <4 x i8> undef)
%8 = zext <4 x i8> %wide.masked.load21 to <4 x i32>
%9 = mul nuw nsw <4 x i32> %8, %5
%10 = add nuw nsw <4 x i32> %9, %broadcast.splat23
- %11 = getelementptr inbounds i32, i32* %res, i32 %index
- %12 = bitcast i32* %11 to <4 x i32>*
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %12, i32 4, <4 x i1> %3)
+ %11 = getelementptr inbounds i32, ptr %res, i32 %index
+ %12 = bitcast ptr %11 to ptr
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %10, ptr %12, i32 4, <4 x i1> %3)
%index.next = add i32 %index, 4
%13 = icmp eq i32 %index.next, %n.vec
br i1 %13, label %for.cond.cleanup, label %vector.body
@@ -751,16 +751,16 @@ for.cond.cleanup.loopexit.unr-lcssa: ; preds = %for.body, %for.body
for.body.epil: ; preds = %for.cond.cleanup.loopexit.unr-lcssa, %for.body.epil
%i.011.epil = phi i32 [ %inc.epil, %for.body.epil ], [ %i.011.unr, %for.cond.cleanup.loopexit.unr-lcssa ]
%epil.iter = phi i32 [ %epil.iter.sub, %for.body.epil ], [ %xtraiter, %for.cond.cleanup.loopexit.unr-lcssa ]
- %arrayidx.epil = getelementptr inbounds i8, i8* %a, i32 %i.011.epil
- %14 = load i8, i8* %arrayidx.epil, align 1
+ %arrayidx.epil = getelementptr inbounds i8, ptr %a, i32 %i.011.epil
+ %14 = load i8, ptr %arrayidx.epil, align 1
%conv.epil = zext i8 %14 to i32
- %arrayidx1.epil = getelementptr inbounds i8, i8* %b, i32 %i.011.epil
- %15 = load i8, i8* %arrayidx1.epil, align 1
+ %arrayidx1.epil = getelementptr inbounds i8, ptr %b, i32 %i.011.epil
+ %15 = load i8, ptr %arrayidx1.epil, align 1
%conv2.epil = zext i8 %15 to i32
%mul.epil = mul nuw nsw i32 %conv2.epil, %conv.epil
%add.epil = add nuw nsw i32 %mul.epil, %conv3
- %arrayidx4.epil = getelementptr inbounds i32, i32* %res, i32 %i.011.epil
- store i32 %add.epil, i32* %arrayidx4.epil, align 4
+ %arrayidx4.epil = getelementptr inbounds i32, ptr %res, i32 %i.011.epil
+ store i32 %add.epil, ptr %arrayidx4.epil, align 4
%inc.epil = add nuw i32 %i.011.epil, 1
%epil.iter.sub = add i32 %epil.iter, -1
%epil.iter.cmp = icmp eq i32 %epil.iter.sub, 0
@@ -772,56 +772,56 @@ for.cond.cleanup: ; preds = %vector.body, %for.c
for.body: ; preds = %for.body, %for.body.preheader.new
%i.011 = phi i32 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ]
%niter = phi i32 [ %unroll_iter, %for.body.preheader.new ], [ %niter.nsub.3, %for.body ]
- %arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.011
- %16 = load i8, i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, ptr %a, i32 %i.011
+ %16 = load i8, ptr %arrayidx, align 1
%conv = zext i8 %16 to i32
- %arrayidx1 = getelementptr inbounds i8, i8* %b, i32 %i.011
- %17 = load i8, i8* %arrayidx1, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %b, i32 %i.011
+ %17 = load i8, ptr %arrayidx1, align 1
%conv2 = zext i8 %17 to i32
%mul = mul nuw nsw i32 %conv2, %conv
%add = add nuw nsw i32 %mul, %conv3
- %arrayidx4 = getelementptr inbounds i32, i32* %res, i32 %i.011
- store i32 %add, i32* %arrayidx4, align 4
+ %arrayidx4 = getelementptr inbounds i32, ptr %res, i32 %i.011
+ store i32 %add, ptr %arrayidx4, align 4
%inc = or disjoint i32 %i.011, 1
- %arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %inc
- %18 = load i8, i8* %arrayidx.1, align 1
+ %arrayidx.1 = getelementptr inbounds i8, ptr %a, i32 %inc
+ %18 = load i8, ptr %arrayidx.1, align 1
%conv.1 = zext i8 %18 to i32
- %arrayidx1.1 = getelementptr inbounds i8, i8* %b, i32 %inc
- %19 = load i8, i8* %arrayidx1.1, align 1
+ %arrayidx1.1 = getelementptr inbounds i8, ptr %b, i32 %inc
+ %19 = load i8, ptr %arrayidx1.1, align 1
%conv2.1 = zext i8 %19 to i32
%mul.1 = mul nuw nsw i32 %conv2.1, %conv.1
%add.1 = add nuw nsw i32 %mul.1, %conv3
- %arrayidx4.1 = getelementptr inbounds i32, i32* %res, i32 %inc
- store i32 %add.1, i32* %arrayidx4.1, align 4
+ %arrayidx4.1 = getelementptr inbounds i32, ptr %res, i32 %inc
+ store i32 %add.1, ptr %arrayidx4.1, align 4
%inc.1 = or disjoint i32 %i.011, 2
- %arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %inc.1
- %20 = load i8, i8* %arrayidx.2, align 1
+ %arrayidx.2 = getelementptr inbounds i8, ptr %a, i32 %inc.1
+ %20 = load i8, ptr %arrayidx.2, align 1
%conv.2 = zext i8 %20 to i32
- %arrayidx1.2 = getelementptr inbounds i8, i8* %b, i32 %inc.1
- %21 = load i8, i8* %arrayidx1.2, align 1
+ %arrayidx1.2 = getelementptr inbounds i8, ptr %b, i32 %inc.1
+ %21 = load i8, ptr %arrayidx1.2, align 1
%conv2.2 = zext i8 %21 to i32
%mul.2 = mul nuw nsw i32 %conv2.2, %conv.2
%add.2 = add nuw nsw i32 %mul.2, %conv3
- %arrayidx4.2 = getelementptr inbounds i32, i32* %res, i32 %inc.1
- store i32 %add.2, i32* %arrayidx4.2, align 4
+ %arrayidx4.2 = getelementptr inbounds i32, ptr %res, i32 %inc.1
+ store i32 %add.2, ptr %arrayidx4.2, align 4
%inc.2 = or disjoint i32 %i.011, 3
- %arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %inc.2
- %22 = load i8, i8* %arrayidx.3, align 1
+ %arrayidx.3 = getelementptr inbounds i8, ptr %a, i32 %inc.2
+ %22 = load i8, ptr %arrayidx.3, align 1
%conv.3 = zext i8 %22 to i32
- %arrayidx1.3 = getelementptr inbounds i8, i8* %b, i32 %inc.2
- %23 = load i8, i8* %arrayidx1.3, align 1
+ %arrayidx1.3 = getelementptr inbounds i8, ptr %b, i32 %inc.2
+ %23 = load i8, ptr %arrayidx1.3, align 1
%conv2.3 = zext i8 %23 to i32
%mul.3 = mul nuw nsw i32 %conv2.3, %conv.3
%add.3 = add nuw nsw i32 %mul.3, %conv3
- %arrayidx4.3 = getelementptr inbounds i32, i32* %res, i32 %inc.2
- store i32 %add.3, i32* %arrayidx4.3, align 4
+ %arrayidx4.3 = getelementptr inbounds i32, ptr %res, i32 %inc.2
+ store i32 %add.3, ptr %arrayidx4.3, align 4
%inc.3 = add nuw i32 %i.011, 4
%niter.nsub.3 = add i32 %niter, -4
%niter.ncmp.3 = icmp eq i32 %niter.nsub.3, 0
br i1 %niter.ncmp.3, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body
}
-define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_ushort(i16* nocapture readonly %a, i16* nocapture readonly %b, i16 signext %c, i32* nocapture %res, i32 %N) {
+define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_ushort(ptr nocapture readonly %a, ptr nocapture readonly %b, i16 signext %c, ptr nocapture %res, i32 %N) {
; CHECK-LABEL: test_vec_mul_scalar_add_ushort:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r4, lr}
@@ -854,20 +854,20 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds i16, i16* %a, i32 %index
+ %0 = getelementptr inbounds i16, ptr %a, i32 %index
%1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
- %2 = bitcast i16* %0 to <4 x i16>*
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef)
+ %2 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %2, i32 2, <4 x i1> %1, <4 x i16> undef)
%3 = zext <4 x i16> %wide.masked.load to <4 x i32>
- %4 = getelementptr inbounds i16, i16* %b, i32 %index
- %5 = bitcast i16* %4 to <4 x i16>*
- %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %5, i32 2, <4 x i1> %1, <4 x i16> undef)
+ %4 = getelementptr inbounds i16, ptr %b, i32 %index
+ %5 = bitcast ptr %4 to ptr
+ %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %5, i32 2, <4 x i1> %1, <4 x i16> undef)
%6 = zext <4 x i16> %wide.masked.load14 to <4 x i32>
%7 = mul nuw nsw <4 x i32> %6, %3
%8 = add nsw <4 x i32> %7, %broadcast.splat16
- %9 = getelementptr inbounds i32, i32* %res, i32 %index
- %10 = bitcast i32* %9 to <4 x i32>*
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %8, <4 x i32>* %10, i32 4, <4 x i1> %1)
+ %9 = getelementptr inbounds i32, ptr %res, i32 %index
+ %10 = bitcast ptr %9 to ptr
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %8, ptr %10, i32 4, <4 x i1> %1)
%index.next = add i32 %index, 4
%11 = icmp eq i32 %index.next, %n.vec
br i1 %11, label %for.cond.cleanup, label %vector.body
@@ -876,7 +876,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret void
}
-define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %c, i32* nocapture %res, i32 %N) {
+define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %c, ptr nocapture %res, i32 %N) {
; CHECK-LABEL: test_vec_mul_scalar_add_int:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
@@ -969,14 +969,14 @@ entry:
br i1 %cmp8, label %for.cond.cleanup, label %vector.memcheck
vector.memcheck: ; preds = %entry
- %scevgep = getelementptr i32, i32* %res, i32 %N
- %scevgep13 = getelementptr i32, i32* %a, i32 %N
- %scevgep16 = getelementptr i32, i32* %b, i32 %N
- %bound0 = icmp ugt i32* %scevgep13, %res
- %bound1 = icmp ugt i32* %scevgep, %a
+ %scevgep = getelementptr i32, ptr %res, i32 %N
+ %scevgep13 = getelementptr i32, ptr %a, i32 %N
+ %scevgep16 = getelementptr i32, ptr %b, i32 %N
+ %bound0 = icmp ugt ptr %scevgep13, %res
+ %bound1 = icmp ugt ptr %scevgep, %a
%found.conflict = and i1 %bound0, %bound1
- %bound018 = icmp ugt i32* %scevgep16, %res
- %bound119 = icmp ugt i32* %scevgep, %b
+ %bound018 = icmp ugt ptr %scevgep16, %res
+ %bound119 = icmp ugt ptr %scevgep, %b
%found.conflict20 = and i1 %bound018, %bound119
%conflict.rdx = or i1 %found.conflict, %found.conflict20
br i1 %conflict.rdx, label %for.body.preheader, label %vector.ph
@@ -1000,18 +1000,18 @@ vector.ph: ; preds = %vector.memcheck
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %2 = getelementptr inbounds i32, i32* %a, i32 %index
+ %2 = getelementptr inbounds i32, ptr %a, i32 %index
%3 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
- %4 = bitcast i32* %2 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %4, i32 4, <4 x i1> %3, <4 x i32> undef)
- %5 = getelementptr inbounds i32, i32* %b, i32 %index
- %6 = bitcast i32* %5 to <4 x i32>*
- %wide.masked.load23 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %6, i32 4, <4 x i1> %3, <4 x i32> undef)
+ %4 = bitcast ptr %2 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %4, i32 4, <4 x i1> %3, <4 x i32> undef)
+ %5 = getelementptr inbounds i32, ptr %b, i32 %index
+ %6 = bitcast ptr %5 to ptr
+ %wide.masked.load23 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %6, i32 4, <4 x i1> %3, <4 x i32> undef)
%7 = mul nsw <4 x i32> %wide.masked.load23, %wide.masked.load
%8 = add nsw <4 x i32> %7, %broadcast.splat25
- %9 = getelementptr inbounds i32, i32* %res, i32 %index
- %10 = bitcast i32* %9 to <4 x i32>*
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %8, <4 x i32>* %10, i32 4, <4 x i1> %3)
+ %9 = getelementptr inbounds i32, ptr %res, i32 %index
+ %10 = bitcast ptr %9 to ptr
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %8, ptr %10, i32 4, <4 x i1> %3)
%index.next = add i32 %index, 4
%11 = icmp eq i32 %index.next, %n.vec
br i1 %11, label %for.cond.cleanup, label %vector.body
@@ -1024,14 +1024,14 @@ for.cond.cleanup.loopexit.unr-lcssa: ; preds = %for.body, %for.body
for.body.epil: ; preds = %for.cond.cleanup.loopexit.unr-lcssa, %for.body.epil
%i.09.epil = phi i32 [ %inc.epil, %for.body.epil ], [ %i.09.unr, %for.cond.cleanup.loopexit.unr-lcssa ]
%epil.iter = phi i32 [ %epil.iter.sub, %for.body.epil ], [ %xtraiter, %for.cond.cleanup.loopexit.unr-lcssa ]
- %arrayidx.epil = getelementptr inbounds i32, i32* %a, i32 %i.09.epil
- %12 = load i32, i32* %arrayidx.epil, align 4
- %arrayidx1.epil = getelementptr inbounds i32, i32* %b, i32 %i.09.epil
- %13 = load i32, i32* %arrayidx1.epil, align 4
+ %arrayidx.epil = getelementptr inbounds i32, ptr %a, i32 %i.09.epil
+ %12 = load i32, ptr %arrayidx.epil, align 4
+ %arrayidx1.epil = getelementptr inbounds i32, ptr %b, i32 %i.09.epil
+ %13 = load i32, ptr %arrayidx1.epil, align 4
%mul.epil = mul nsw i32 %13, %12
%add.epil = add nsw i32 %mul.epil, %c
- %arrayidx2.epil = getelementptr inbounds i32, i32* %res, i32 %i.09.epil
- store i32 %add.epil, i32* %arrayidx2.epil, align 4
+ %arrayidx2.epil = getelementptr inbounds i32, ptr %res, i32 %i.09.epil
+ store i32 %add.epil, ptr %arrayidx2.epil, align 4
%inc.epil = add nuw i32 %i.09.epil, 1
%epil.iter.sub = add i32 %epil.iter, -1
%epil.iter.cmp = icmp eq i32 %epil.iter.sub, 0
@@ -1043,48 +1043,48 @@ for.cond.cleanup: ; preds = %vector.body, %for.c
for.body: ; preds = %for.body, %for.body.preheader.new
%i.09 = phi i32 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ]
%niter = phi i32 [ %unroll_iter, %for.body.preheader.new ], [ %niter.nsub.3, %for.body ]
- %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.09
- %14 = load i32, i32* %arrayidx, align 4
- %arrayidx1 = getelementptr inbounds i32, i32* %b, i32 %i.09
- %15 = load i32, i32* %arrayidx1, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.09
+ %14 = load i32, ptr %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds i32, ptr %b, i32 %i.09
+ %15 = load i32, ptr %arrayidx1, align 4
%mul = mul nsw i32 %15, %14
%add = add nsw i32 %mul, %c
- %arrayidx2 = getelementptr inbounds i32, i32* %res, i32 %i.09
- store i32 %add, i32* %arrayidx2, align 4
+ %arrayidx2 = getelementptr inbounds i32, ptr %res, i32 %i.09
+ store i32 %add, ptr %arrayidx2, align 4
%inc = or disjoint i32 %i.09, 1
- %arrayidx.1 = getelementptr inbounds i32, i32* %a, i32 %inc
- %16 = load i32, i32* %arrayidx.1, align 4
- %arrayidx1.1 = getelementptr inbounds i32, i32* %b, i32 %inc
- %17 = load i32, i32* %arrayidx1.1, align 4
+ %arrayidx.1 = getelementptr inbounds i32, ptr %a, i32 %inc
+ %16 = load i32, ptr %arrayidx.1, align 4
+ %arrayidx1.1 = getelementptr inbounds i32, ptr %b, i32 %inc
+ %17 = load i32, ptr %arrayidx1.1, align 4
%mul.1 = mul nsw i32 %17, %16
%add.1 = add nsw i32 %mul.1, %c
- %arrayidx2.1 = getelementptr inbounds i32, i32* %res, i32 %inc
- store i32 %add.1, i32* %arrayidx2.1, align 4
+ %arrayidx2.1 = getelementptr inbounds i32, ptr %res, i32 %inc
+ store i32 %add.1, ptr %arrayidx2.1, align 4
%inc.1 = or disjoint i32 %i.09, 2
- %arrayidx.2 = getelementptr inbounds i32, i32* %a, i32 %inc.1
- %18 = load i32, i32* %arrayidx.2, align 4
- %arrayidx1.2 = getelementptr inbounds i32, i32* %b, i32 %inc.1
- %19 = load i32, i32* %arrayidx1.2, align 4
+ %arrayidx.2 = getelementptr inbounds i32, ptr %a, i32 %inc.1
+ %18 = load i32, ptr %arrayidx.2, align 4
+ %arrayidx1.2 = getelementptr inbounds i32, ptr %b, i32 %inc.1
+ %19 = load i32, ptr %arrayidx1.2, align 4
%mul.2 = mul nsw i32 %19, %18
%add.2 = add nsw i32 %mul.2, %c
- %arrayidx2.2 = getelementptr inbounds i32, i32* %res, i32 %inc.1
- store i32 %add.2, i32* %arrayidx2.2, align 4
+ %arrayidx2.2 = getelementptr inbounds i32, ptr %res, i32 %inc.1
+ store i32 %add.2, ptr %arrayidx2.2, align 4
%inc.2 = or disjoint i32 %i.09, 3
- %arrayidx.3 = getelementptr inbounds i32, i32* %a, i32 %inc.2
- %20 = load i32, i32* %arrayidx.3, align 4
- %arrayidx1.3 = getelementptr inbounds i32, i32* %b, i32 %inc.2
- %21 = load i32, i32* %arrayidx1.3, align 4
+ %arrayidx.3 = getelementptr inbounds i32, ptr %a, i32 %inc.2
+ %20 = load i32, ptr %arrayidx.3, align 4
+ %arrayidx1.3 = getelementptr inbounds i32, ptr %b, i32 %inc.2
+ %21 = load i32, ptr %arrayidx1.3, align 4
%mul.3 = mul nsw i32 %21, %20
%add.3 = add nsw i32 %mul.3, %c
- %arrayidx2.3 = getelementptr inbounds i32, i32* %res, i32 %inc.2
- store i32 %add.3, i32* %arrayidx2.3, align 4
+ %arrayidx2.3 = getelementptr inbounds i32, ptr %res, i32 %inc.2
+ store i32 %add.3, ptr %arrayidx2.3, align 4
%inc.3 = add nuw i32 %i.09, 4
%niter.nsub.3 = add i32 %niter, -4
%niter.ncmp.3 = icmp eq i32 %niter.nsub.3, 0
br i1 %niter.ncmp.3, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body
}
-define dso_local arm_aapcs_vfpcc void @test_v8i8_to_v8i16(i16* noalias nocapture %a, i8* nocapture readonly %b, i8* nocapture readonly %c, i32 %N) {
+define dso_local arm_aapcs_vfpcc void @test_v8i8_to_v8i16(ptr noalias nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) {
; CHECK-LABEL: test_v8i8_to_v8i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r7, lr}
@@ -1113,19 +1113,19 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds i8, i8* %b, i32 %index
+ %0 = getelementptr inbounds i8, ptr %b, i32 %index
%1 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N)
- %2 = bitcast i8* %0 to <8 x i8>*
- %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %2, i32 1, <8 x i1> %1, <8 x i8> undef)
+ %2 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %2, i32 1, <8 x i1> %1, <8 x i8> undef)
%3 = zext <8 x i8> %wide.masked.load to <8 x i16>
- %4 = getelementptr inbounds i8, i8* %c, i32 %index
- %5 = bitcast i8* %4 to <8 x i8>*
- %wide.masked.load14 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %5, i32 1, <8 x i1> %1, <8 x i8> undef)
+ %4 = getelementptr inbounds i8, ptr %c, i32 %index
+ %5 = bitcast ptr %4 to ptr
+ %wide.masked.load14 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %5, i32 1, <8 x i1> %1, <8 x i8> undef)
%6 = zext <8 x i8> %wide.masked.load14 to <8 x i16>
%7 = mul nuw <8 x i16> %6, %3
- %8 = getelementptr inbounds i16, i16* %a, i32 %index
- %9 = bitcast i16* %8 to <8 x i16>*
- call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %7, <8 x i16>* %9, i32 2, <8 x i1> %1)
+ %8 = getelementptr inbounds i16, ptr %a, i32 %index
+ %9 = bitcast ptr %8 to ptr
+ call void @llvm.masked.store.v8i16.p0(<8 x i16> %7, ptr %9, i32 2, <8 x i1> %1)
%index.next = add i32 %index, 8
%10 = icmp eq i32 %index.next, %n.vec
br i1 %10, label %for.cond.cleanup, label %vector.body
@@ -1134,12 +1134,12 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret void
}
-declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>)
-declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>)
-declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
-declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
-declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>)
-declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
+declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32 immarg, <4 x i1>, <4 x i8>)
+declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32 immarg, <8 x i1>, <8 x i8>)
+declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>)
+declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
+declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>)
+declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir
index b599829a0cfc6..0c50a954ddfd0 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir
@@ -3,68 +3,68 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=-lob %s -run-pass=arm-cp-islands --verify-machineinstrs -o - | FileCheck %s --check-prefix=CHECK-NOLOB
--- |
- %struct.head_s = type { %struct.head_s*, %struct.data_s* }
+ %struct.head_s = type { ptr, ptr }
%struct.data_s = type { i16, i16 }
- define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr {
+ define dso_local arm_aapcscc ptr @search(ptr readonly %list, ptr nocapture readonly %info) local_unnamed_addr {
entry:
- %idx = getelementptr inbounds %struct.data_s, %struct.data_s* %info, i32 0, i32 1
- %0 = load i16, i16* %idx, align 2
+ %idx = getelementptr inbounds %struct.data_s, ptr %info, i32 0, i32 1
+ %0 = load i16, ptr %idx, align 2
%cmp = icmp sgt i16 %0, -1
br i1 %cmp, label %while.cond.preheader, label %while.cond9.preheader
while.cond9.preheader: ; preds = %entry
- %1 = icmp eq %struct.head_s* %list, null
+ %1 = icmp eq ptr %list, null
br i1 %1, label %return, label %land.rhs11.lr.ph
land.rhs11.lr.ph: ; preds = %while.cond9.preheader
- %data16143 = bitcast %struct.data_s* %info to i16*
- %2 = load i16, i16* %data16143, align 2
+ %data16143 = bitcast ptr %info to ptr
+ %2 = load i16, ptr %data16143, align 2
%conv15 = sext i16 %2 to i32
br label %land.rhs11
while.cond.preheader: ; preds = %entry
- %3 = icmp eq %struct.head_s* %list, null
+ %3 = icmp eq ptr %list, null
br i1 %3, label %return, label %land.rhs.preheader
land.rhs.preheader: ; preds = %while.cond.preheader
br label %land.rhs
land.rhs: ; preds = %land.rhs.preheader, %while.body
- %list.addr.033 = phi %struct.head_s* [ %6, %while.body ], [ %list, %land.rhs.preheader ]
- %info2 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.033, i32 0, i32 1
- %4 = load %struct.data_s*, %struct.data_s** %info2, align 4
- %idx3 = getelementptr inbounds %struct.data_s, %struct.data_s* %4, i32 0, i32 1
- %5 = load i16, i16* %idx3, align 2
+ %list.addr.033 = phi ptr [ %6, %while.body ], [ %list, %land.rhs.preheader ]
+ %info2 = getelementptr inbounds %struct.head_s, ptr %list.addr.033, i32 0, i32 1
+ %4 = load ptr, ptr %info2, align 4
+ %idx3 = getelementptr inbounds %struct.data_s, ptr %4, i32 0, i32 1
+ %5 = load i16, ptr %idx3, align 2
%cmp7 = icmp eq i16 %5, %0
br i1 %cmp7, label %return, label %while.body
while.body: ; preds = %land.rhs
- %next4 = bitcast %struct.head_s* %list.addr.033 to %struct.head_s**
- %6 = load %struct.head_s*, %struct.head_s** %next4, align 4
- %tobool = icmp ne %struct.head_s* %6, null
+ %next4 = bitcast ptr %list.addr.033 to ptr
+ %6 = load ptr, ptr %next4, align 4
+ %tobool = icmp ne ptr %6, null
br i1 %tobool, label %return, label %land.rhs
land.rhs11: ; preds = %while.body19, %land.rhs11.lr.ph
- %list.addr.136 = phi %struct.head_s* [ %list, %land.rhs11.lr.ph ], [ %10, %while.body19 ]
- %info12 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.136, i32 0, i32 1
- %7 = load %struct.data_s*, %struct.data_s** %info12, align 4
- %data165 = bitcast %struct.data_s* %7 to i16*
- %8 = load i16, i16* %data165, align 2
+ %list.addr.136 = phi ptr [ %list, %land.rhs11.lr.ph ], [ %10, %while.body19 ]
+ %info12 = getelementptr inbounds %struct.head_s, ptr %list.addr.136, i32 0, i32 1
+ %7 = load ptr, ptr %info12, align 4
+ %data165 = bitcast ptr %7 to ptr
+ %8 = load i16, ptr %data165, align 2
%9 = and i16 %8, 255
%and = zext i16 %9 to i32
%cmp16 = icmp eq i32 %and, %conv15
br i1 %cmp16, label %return, label %while.body19
while.body19: ; preds = %land.rhs11
- %next206 = bitcast %struct.head_s* %list.addr.136 to %struct.head_s**
- %10 = load %struct.head_s*, %struct.head_s** %next206, align 4
- %tobool10 = icmp eq %struct.head_s* %10, null
+ %next206 = bitcast ptr %list.addr.136 to ptr
+ %10 = load ptr, ptr %next206, align 4
+ %tobool10 = icmp eq ptr %10, null
br i1 %tobool10, label %return, label %land.rhs11
return: ; preds = %while.body19, %land.rhs11, %while.body, %land.rhs, %while.cond.preheader, %while.cond9.preheader
- %retval.0 = phi %struct.head_s* [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ null, %while.body ], [ %list.addr.033, %land.rhs ], [ null, %while.body19 ], [ %list.addr.136, %land.rhs11 ]
- ret %struct.head_s* %retval.0
+ %retval.0 = phi ptr [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ null, %while.body ], [ %list.addr.033, %land.rhs ], [ null, %while.body19 ], [ %list.addr.136, %land.rhs11 ]
+ ret ptr %retval.0
}
...
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir
index 465d080b3af6b..0ea3b26903f3a 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir
@@ -2,68 +2,68 @@
# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-cp-islands %s -o - --verify-machineinstrs | FileCheck %s
--- |
- %struct.head_s = type { %struct.head_s*, %struct.data_s* }
+ %struct.head_s = type { ptr, ptr }
%struct.data_s = type { i16, i16 }
- define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr {
+ define dso_local arm_aapcscc ptr @search(ptr readonly %list, ptr nocapture readonly %info) local_unnamed_addr {
entry:
- %idx = getelementptr inbounds %struct.data_s, %struct.data_s* %info, i32 0, i32 1
- %tmp = load i16, i16* %idx, align 2
+ %idx = getelementptr inbounds %struct.data_s, ptr %info, i32 0, i32 1
+ %tmp = load i16, ptr %idx, align 2
%cmp = icmp sgt i16 %tmp, -1
br i1 %cmp, label %while.cond.preheader, label %while.cond9.preheader
while.cond9.preheader: ; preds = %entry
- %0 = icmp eq %struct.head_s* %list, null
+ %0 = icmp eq ptr %list, null
br i1 %0, label %return, label %land.rhs11.lr.ph
land.rhs11.lr.ph: ; preds = %while.cond9.preheader
- %data16143 = bitcast %struct.data_s* %info to i16*
- %tmp1 = load i16, i16* %data16143, align 2
+ %data16143 = bitcast ptr %info to ptr
+ %tmp1 = load i16, ptr %data16143, align 2
%conv15 = sext i16 %tmp1 to i32
br label %land.rhs11
while.cond.preheader: ; preds = %entry
- %1 = icmp eq %struct.head_s* %list, null
+ %1 = icmp eq ptr %list, null
br i1 %1, label %return, label %land.rhs.preheader
land.rhs.preheader: ; preds = %while.cond.preheader
br label %land.rhs
while.body: ; preds = %land.rhs
- %next4 = bitcast %struct.head_s* %list.addr.033 to %struct.head_s**
- %tmp4 = load %struct.head_s*, %struct.head_s** %next4, align 4
- %tobool = icmp eq %struct.head_s* %tmp4, null
+ %next4 = bitcast ptr %list.addr.033 to ptr
+ %tmp4 = load ptr, ptr %next4, align 4
+ %tobool = icmp eq ptr %tmp4, null
br i1 %tobool, label %return, label %land.rhs
land.rhs: ; preds = %land.rhs.preheader, %while.body
- %list.addr.033 = phi %struct.head_s* [ %tmp4, %while.body ], [ %list, %land.rhs.preheader ]
- %info2 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.033, i32 0, i32 1
- %tmp2 = load %struct.data_s*, %struct.data_s** %info2, align 4
- %idx3 = getelementptr inbounds %struct.data_s, %struct.data_s* %tmp2, i32 0, i32 1
- %tmp3 = load i16, i16* %idx3, align 2
+ %list.addr.033 = phi ptr [ %tmp4, %while.body ], [ %list, %land.rhs.preheader ]
+ %info2 = getelementptr inbounds %struct.head_s, ptr %list.addr.033, i32 0, i32 1
+ %tmp2 = load ptr, ptr %info2, align 4
+ %idx3 = getelementptr inbounds %struct.data_s, ptr %tmp2, i32 0, i32 1
+ %tmp3 = load i16, ptr %idx3, align 2
%cmp7 = icmp eq i16 %tmp3, %tmp
br i1 %cmp7, label %return, label %while.body
while.body19: ; preds = %land.rhs11
- %next205 = bitcast %struct.head_s* %list.addr.136 to %struct.head_s**
- %tmp8 = load %struct.head_s*, %struct.head_s** %next205, align 4
- %tobool10 = icmp eq %struct.head_s* %tmp8, null
+ %next205 = bitcast ptr %list.addr.136 to ptr
+ %tmp8 = load ptr, ptr %next205, align 4
+ %tobool10 = icmp eq ptr %tmp8, null
br i1 %tobool10, label %return, label %land.rhs11
land.rhs11: ; preds = %while.body19, %land.rhs11.lr.ph
- %list.addr.136 = phi %struct.head_s* [ %list, %land.rhs11.lr.ph ], [ %tmp8, %while.body19 ]
- %info12 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.136, i32 0, i32 1
- %tmp5 = load %struct.data_s*, %struct.data_s** %info12, align 4
- %data166 = bitcast %struct.data_s* %tmp5 to i16*
- %tmp6 = load i16, i16* %data166, align 2
+ %list.addr.136 = phi ptr [ %list, %land.rhs11.lr.ph ], [ %tmp8, %while.body19 ]
+ %info12 = getelementptr inbounds %struct.head_s, ptr %list.addr.136, i32 0, i32 1
+ %tmp5 = load ptr, ptr %info12, align 4
+ %data166 = bitcast ptr %tmp5 to ptr
+ %tmp6 = load i16, ptr %data166, align 2
%2 = and i16 %tmp6, 255
%and = zext i16 %2 to i32
%cmp16 = icmp eq i32 %and, %conv15
br i1 %cmp16, label %return, label %while.body19
return: ; preds = %land.rhs11, %while.body19, %land.rhs, %while.body, %while.cond.preheader, %while.cond9.preheader
- %retval.0 = phi %struct.head_s* [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ null, %while.body ], [ %list.addr.033, %land.rhs ], [ null, %while.body19 ], [ %list.addr.136, %land.rhs11 ]
- ret %struct.head_s* %retval.0
+ %retval.0 = phi ptr [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ null, %while.body ], [ %list.addr.033, %land.rhs ], [ null, %while.body19 ], [ %list.addr.136, %land.rhs11 ]
+ ret ptr %retval.0
}
...
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir
index 3d53b0acf8b98..87694e3c39327 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir
@@ -6,69 +6,69 @@
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-unknown-unknown"
- %struct.head_s = type { %struct.head_s*, %struct.data_s* }
+ %struct.head_s = type { ptr, ptr }
%struct.data_s = type { i16, i16 }
; Function Attrs: norecurse nounwind readonly
- define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr {
+ define dso_local arm_aapcscc ptr @search(ptr readonly %list, ptr nocapture readonly %info) local_unnamed_addr {
entry:
- %idx = getelementptr inbounds %struct.data_s, %struct.data_s* %info, i32 0, i32 1
- %0 = load i16, i16* %idx, align 2
+ %idx = getelementptr inbounds %struct.data_s, ptr %info, i32 0, i32 1
+ %0 = load i16, ptr %idx, align 2
%cmp = icmp sgt i16 %0, -1
br i1 %cmp, label %while.cond.preheader, label %while.cond9.preheader
while.cond9.preheader: ; preds = %entry
- %1 = icmp eq %struct.head_s* %list, null
+ %1 = icmp eq ptr %list, null
br i1 %1, label %return, label %land.rhs11.lr.ph
land.rhs11.lr.ph: ; preds = %while.cond9.preheader
- %data16143 = bitcast %struct.data_s* %info to i16*
- %2 = load i16, i16* %data16143, align 2
+ %data16143 = bitcast ptr %info to ptr
+ %2 = load i16, ptr %data16143, align 2
%conv15 = sext i16 %2 to i32
br label %land.rhs11
while.cond.preheader: ; preds = %entry
- %3 = icmp eq %struct.head_s* %list, null
+ %3 = icmp eq ptr %list, null
br i1 %3, label %return, label %land.rhs.preheader
land.rhs.preheader: ; preds = %while.cond.preheader
br label %land.rhs
land.rhs: ; preds = %land.rhs.preheader, %while.body
- %list.addr.033 = phi %struct.head_s* [ %6, %while.body ], [ %list, %land.rhs.preheader ]
- %info2 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.033, i32 0, i32 1
- %4 = load %struct.data_s*, %struct.data_s** %info2, align 4
- %idx3 = getelementptr inbounds %struct.data_s, %struct.data_s* %4, i32 0, i32 1
- %5 = load i16, i16* %idx3, align 2
+ %list.addr.033 = phi ptr [ %6, %while.body ], [ %list, %land.rhs.preheader ]
+ %info2 = getelementptr inbounds %struct.head_s, ptr %list.addr.033, i32 0, i32 1
+ %4 = load ptr, ptr %info2, align 4
+ %idx3 = getelementptr inbounds %struct.data_s, ptr %4, i32 0, i32 1
+ %5 = load i16, ptr %idx3, align 2
%cmp7 = icmp eq i16 %5, %0
br i1 %cmp7, label %return, label %while.body
while.body: ; preds = %land.rhs
- %next4 = bitcast %struct.head_s* %list.addr.033 to %struct.head_s**
- %6 = load %struct.head_s*, %struct.head_s** %next4, align 4
- %tobool = icmp eq %struct.head_s* %6, null
+ %next4 = bitcast ptr %list.addr.033 to ptr
+ %6 = load ptr, ptr %next4, align 4
+ %tobool = icmp eq ptr %6, null
br i1 %tobool, label %return, label %land.rhs
land.rhs11: ; preds = %while.body19, %land.rhs11.lr.ph
- %list.addr.136 = phi %struct.head_s* [ %list, %land.rhs11.lr.ph ], [ %10, %while.body19 ]
- %info12 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.136, i32 0, i32 1
- %7 = load %struct.data_s*, %struct.data_s** %info12, align 4
- %data165 = bitcast %struct.data_s* %7 to i16*
- %8 = load i16, i16* %data165, align 2
+ %list.addr.136 = phi ptr [ %list, %land.rhs11.lr.ph ], [ %10, %while.body19 ]
+ %info12 = getelementptr inbounds %struct.head_s, ptr %list.addr.136, i32 0, i32 1
+ %7 = load ptr, ptr %info12, align 4
+ %data165 = bitcast ptr %7 to ptr
+ %8 = load i16, ptr %data165, align 2
%9 = and i16 %8, 255
%and = zext i16 %9 to i32
%cmp16 = icmp eq i32 %and, %conv15
br i1 %cmp16, label %return, label %while.body19
while.body19: ; preds = %land.rhs11
- %next206 = bitcast %struct.head_s* %list.addr.136 to %struct.head_s**
- %10 = load %struct.head_s*, %struct.head_s** %next206, align 4
- %tobool10 = icmp eq %struct.head_s* %10, null
+ %next206 = bitcast ptr %list.addr.136 to ptr
+ %10 = load ptr, ptr %next206, align 4
+ %tobool10 = icmp eq ptr %10, null
br i1 %tobool10, label %return, label %land.rhs11
return: ; preds = %while.body19, %land.rhs11, %while.body, %land.rhs, %while.cond.preheader, %while.cond9.preheader
- %retval.0 = phi %struct.head_s* [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ %list.addr.033, %land.rhs ], [ null, %while.body ], [ %list.addr.136, %land.rhs11 ], [ null, %while.body19 ]
- ret %struct.head_s* %retval.0
+ %retval.0 = phi ptr [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ %list.addr.033, %land.rhs ], [ null, %while.body ], [ %list.addr.136, %land.rhs11 ], [ null, %while.body19 ]
+ ret ptr %retval.0
}
...
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir
index 7d898653caab0..e445598f1a31a 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir
@@ -1,7 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
--- |
- define dso_local i32 @no_vpsel_liveout(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
+ define dso_local i32 @no_vpsel_liveout(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -18,22 +18,22 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp13, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
- %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr
%tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
- %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32>
%tmp12 = mul nsw <4 x i32> %tmp11, %tmp10
%tmp13 = add <4 x i32> %tmp12, %vec.phi
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
- %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
+ %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4
%tmp14 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp15 = icmp ne i32 %tmp14, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -47,7 +47,7 @@
%res.0.lcssa = phi i32 [ 0, %entry ], [ %tmp16, %middle.block ]
ret i32 %res.0.lcssa
}
- declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
+ declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) #1
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir
index e0a483047744f..14019372e0e80 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve %s -run-pass=arm-low-overhead-loops -o - --verify-machineinstrs | FileCheck %s
--- |
- define dso_local arm_aapcs_vfpcc zeroext i8 @non_masked_load(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) {
+ define dso_local arm_aapcs_vfpcc zeroext i8 @non_masked_load(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) {
entry:
%cmp11 = icmp eq i32 %N, 0
%0 = add i32 %N, 15
@@ -20,21 +20,21 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv20 = phi i8* [ %scevgep21, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv20 = phi ptr [ %scevgep21, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <16 x i8> [ zeroinitializer, %vector.ph ], [ %13, %vector.body ]
%8 = phi i32 [ %start, %vector.ph ], [ %14, %vector.body ]
%9 = phi i32 [ %N, %vector.ph ], [ %11, %vector.body ]
- %lsr.iv2022 = bitcast i8* %lsr.iv20 to <16 x i8>*
- %lsr.iv19 = bitcast i8* %lsr.iv to <16 x i8>*
+ %lsr.iv2022 = bitcast ptr %lsr.iv20 to ptr
+ %lsr.iv19 = bitcast ptr %lsr.iv to ptr
%10 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %9)
%11 = sub i32 %9, 16
- %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv19, i32 1, <16 x i1> %10, <16 x i8> undef)
- %wide.load16 = load <16 x i8>, <16 x i8>* %lsr.iv2022
+ %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv19, i32 1, <16 x i1> %10, <16 x i8> undef)
+ %wide.load16 = load <16 x i8>, ptr %lsr.iv2022
%12 = add <16 x i8> %wide.masked.load, %vec.phi
%13 = add <16 x i8> %12, %wide.load16
- %scevgep = getelementptr i8, i8* %lsr.iv, i32 16
- %scevgep21 = getelementptr i8, i8* %lsr.iv20, i32 16
+ %scevgep = getelementptr i8, ptr %lsr.iv, i32 16
+ %scevgep21 = getelementptr i8, ptr %lsr.iv20, i32 16
%14 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %8, i32 1)
%15 = icmp ne i32 %14, 0
br i1 %15, label %vector.body, label %middle.block
@@ -52,7 +52,7 @@
ret i8 %res.0.lcssa
}
- declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) #1
+ declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>) #1
declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) #2
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-store.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-store.mir
index 3ee066a9ffe2e..d0959c0d491f2 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-store.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-store.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s
--- |
- define dso_local arm_aapcs_vfpcc void @non_masked_store(i8* noalias nocapture %res, i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) {
+ define dso_local arm_aapcs_vfpcc void @non_masked_store(ptr noalias nocapture %res, ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) {
entry:
%cmp10 = icmp eq i32 %N, 0
%0 = add i32 %N, 15
@@ -18,23 +18,23 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv19 = phi i8* [ %scevgep20, %vector.body ], [ %res, %vector.ph ]
- %lsr.iv16 = phi i8* [ %scevgep17, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv19 = phi ptr [ %scevgep20, %vector.body ], [ %res, %vector.ph ]
+ %lsr.iv16 = phi ptr [ %scevgep17, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
- %lsr.iv1921 = bitcast i8* %lsr.iv19 to <16 x i8>*
- %lsr.iv1618 = bitcast i8* %lsr.iv16 to <16 x i8>*
- %lsr.iv15 = bitcast i8* %lsr.iv to <16 x i8>*
+ %lsr.iv1921 = bitcast ptr %lsr.iv19 to ptr
+ %lsr.iv1618 = bitcast ptr %lsr.iv16 to ptr
+ %lsr.iv15 = bitcast ptr %lsr.iv to ptr
%8 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %7)
%9 = sub i32 %7, 16
- %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv15, i32 1, <16 x i1> %8, <16 x i8> undef)
- %wide.masked.load14 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv1618, i32 1, <16 x i1> %8, <16 x i8> undef)
+ %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv15, i32 1, <16 x i1> %8, <16 x i8> undef)
+ %wide.masked.load14 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv1618, i32 1, <16 x i1> %8, <16 x i8> undef)
%10 = add <16 x i8> %wide.masked.load14, %wide.masked.load
- store <16 x i8> %10, <16 x i8>* %lsr.iv1921
- %scevgep = getelementptr i8, i8* %lsr.iv, i32 16
- %scevgep17 = getelementptr i8, i8* %lsr.iv16, i32 16
- %scevgep20 = getelementptr i8, i8* %lsr.iv19, i32 16
+ store <16 x i8> %10, ptr %lsr.iv1921
+ %scevgep = getelementptr i8, ptr %lsr.iv, i32 16
+ %scevgep17 = getelementptr i8, ptr %lsr.iv16, i32 16
+ %scevgep20 = getelementptr i8, ptr %lsr.iv19, i32 16
%11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1)
%12 = icmp ne i32 %11, 0
br i1 %12, label %vector.body, label %for.cond.cleanup
@@ -43,8 +43,8 @@
ret void
}
- declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>)
- declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>)
+ declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>)
+ declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32 immarg, <16 x i1>)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <16 x i1> @llvm.arm.mve.vctp8(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-invariant.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-invariant.mir
index 31b7ee2c4dad1..911e1d607a9d5 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-invariant.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-invariant.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
--- |
- define dso_local <4 x i32> @invariant_predicated_add_use(i16* nocapture readonly %a, i32* %c, i32 %N, <4 x i32> %pass) #0 {
+ define dso_local <4 x i32> @invariant_predicated_add_use(ptr nocapture readonly %a, ptr %c, i32 %N, <4 x i32> %pass) #0 {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -19,15 +19,15 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
%tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
%acc.next = tail call <4 x i32> @llvm.arm.mve.add.predicated.v4i32.v4i1(<4 x i32> %pass, <4 x i32> %tmp10, <4 x i1> %tmp8, <4 x i32> undef)
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
%tmp11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp12 = icmp ne i32 %tmp11, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -38,7 +38,7 @@
ret <4 x i32> %res
}
- declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
+ declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll
index c5d63a2f081a6..9194d7842a6d3 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -O3 -tail-predication=force-enabled-no-reductions %s -o - | FileCheck %s
-define arm_aapcs_vfpcc <4 x float> @arm_max_no_idx_f32_mve(float* %pSrc, i32 %blockSize, float* nocapture %pResult) {
+define arm_aapcs_vfpcc <4 x float> @arm_max_no_idx_f32_mve(ptr %pSrc, i32 %blockSize, ptr nocapture %pResult) {
; CHECK-LABEL: arm_max_no_idx_f32_mve:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -24,12 +24,12 @@ entry:
do.body: ; preds = %do.body, %entry
%blockSize.addr.0 = phi i32 [ %blockSize, %entry ], [ %sub, %do.body ]
%curExtremValVec.0 = phi <4 x float> [ <float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000>, %entry ], [ %3, %do.body ]
- %pSrc.addr.0 = phi float* [ %pSrc, %entry ], [ %add.ptr, %do.body ]
+ %pSrc.addr.0 = phi ptr [ %pSrc, %entry ], [ %add.ptr, %do.body ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blockSize.addr.0)
- %1 = bitcast float* %pSrc.addr.0 to <4 x float>*
- %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
+ %1 = bitcast ptr %pSrc.addr.0 to ptr
+ %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
%3 = tail call fast <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> %2, <4 x float> %curExtremValVec.0, i32 0, <4 x i1> %0, <4 x float> %curExtremValVec.0)
- %add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4
+ %add.ptr = getelementptr inbounds float, ptr %pSrc.addr.0, i32 4
%sub = add i32 %blockSize.addr.0, -4
%cmp = icmp sgt i32 %sub, 0
br i1 %cmp, label %do.body, label %do.end
@@ -40,6 +40,6 @@ do.end: ; preds = %do.body
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
-declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
+declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>)
declare <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float>, <4 x float>, i32, <4 x i1>, <4 x float>)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir
index 13ba3594a22bf..35cc7dd731801 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s --verify-machineinstrs -o - | FileCheck %s
--- |
- define i16 @predicated_livout(i8* %input_1_vect, i8* %input_2_vect, i32 %block_size) #0 {
+ define i16 @predicated_livout(ptr %input_1_vect, ptr %input_2_vect, i32 %block_size) #0 {
entry:
%rnd.up = add i32 %block_size, 7
%div = lshr i32 %rnd.up, 3
@@ -14,21 +14,21 @@
for.body: ; preds = %for.body.preheader, %for.body
%lsr.iv = phi i32 [ 0, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
- %input_1_vect.addr.052 = phi i8* [ %add.ptr, %for.body ], [ %input_1_vect, %for.body.preheader ]
- %input_2_vect.addr.051 = phi i8* [ %add.ptr14, %for.body ], [ %input_2_vect, %for.body.preheader ]
+ %input_1_vect.addr.052 = phi ptr [ %add.ptr, %for.body ], [ %input_1_vect, %for.body.preheader ]
+ %input_2_vect.addr.051 = phi ptr [ %add.ptr14, %for.body ], [ %input_2_vect, %for.body.preheader ]
%num_elements.049 = phi i32 [ %sub, %for.body ], [ %block_size, %for.body.preheader ]
%acc = phi <8 x i16> [ %acc.next, %for.body ], [ zeroinitializer, %for.body.preheader ]
- %input_2_cast = bitcast i8* %input_2_vect.addr.051 to <8 x i8>*
- %input_1_cast = bitcast i8* %input_1_vect.addr.052 to <8 x i8>*
+ %input_2_cast = bitcast ptr %input_2_vect.addr.051 to ptr
+ %input_1_cast = bitcast ptr %input_1_vect.addr.052 to ptr
%pred = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %num_elements.049)
- %load.1 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %input_1_cast, i32 1, <8 x i1> %pred, <8 x i8> undef)
+ %load.1 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %input_1_cast, i32 1, <8 x i1> %pred, <8 x i8> undef)
%zext.load.1 = zext <8 x i8> %load.1 to <8 x i16>
- %load.2 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %input_2_cast, i32 1, <8 x i1> %pred, <8 x i8> undef)
+ %load.2 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %input_2_cast, i32 1, <8 x i1> %pred, <8 x i8> undef)
%zext.load.2 = zext <8 x i8> %load.2 to <8 x i16>
%add = tail call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %zext.load.1, <8 x i16> %zext.load.2, <8 x i1> %pred, <8 x i16> undef)
%acc.next = tail call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %add, <8 x i16> %acc, <8 x i1> %pred, <8 x i16> undef)
- %add.ptr = getelementptr inbounds i8, i8* %input_1_vect.addr.052, i32 8
- %add.ptr14 = getelementptr inbounds i8, i8* %input_2_vect.addr.051, i32 8
+ %add.ptr = getelementptr inbounds i8, ptr %input_1_vect.addr.052, i32 8
+ %add.ptr14 = getelementptr inbounds i8, ptr %input_2_vect.addr.051, i32 8
%sub = add i32 %num_elements.049, -8
%iv.next = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv, i32 1)
%cmp = icmp ne i32 %iv.next, 0
@@ -44,7 +44,7 @@
}
declare <8 x i1> @llvm.arm.mve.vctp16(i32) #1
- declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>) #2
+ declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32 immarg, <8 x i1>, <8 x i8>) #2
declare i1 @llvm.test.set.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) #4
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir
index 4ac6c60764e12..3ef1569829cac 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
--- |
- define dso_local arm_aapcs_vfpcc i32 @mul_var_i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) {
+ define dso_local arm_aapcs_vfpcc i32 @mul_var_i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) {
entry:
%cmp9.not = icmp eq i32 %N, 0
%0 = add i32 %N, 3
@@ -18,24 +18,24 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv14 = phi i8* [ %scevgep15, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %14, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %15, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
- %lsr.iv13 = bitcast i8* %lsr.iv to <4 x i8>*
- %lsr.iv1416 = bitcast i8* %lsr.iv14 to <4 x i8>*
+ %lsr.iv13 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
%9 = sub i32 %7, 4
- %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %lsr.iv13, i32 1, <4 x i1> %8, <4 x i8> undef)
+ %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %lsr.iv13, i32 1, <4 x i1> %8, <4 x i8> undef)
%10 = zext <4 x i8> %wide.masked.load to <4 x i32>
- %wide.masked.load12 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %lsr.iv1416, i32 1, <4 x i1> %8, <4 x i8> undef)
+ %wide.masked.load12 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %lsr.iv1416, i32 1, <4 x i1> %8, <4 x i8> undef)
%11 = zext <4 x i8> %wide.masked.load12 to <4 x i32>
%12 = mul nuw nsw <4 x i32> %11, %10
%13 = select <4 x i1> %8, <4 x i32> %12, <4 x i32> zeroinitializer
%14 = add <4 x i32> %vec.phi, %13
- %scevgep = getelementptr i8, i8* %lsr.iv, i32 4
- %scevgep15 = getelementptr i8, i8* %lsr.iv14, i32 4
+ %scevgep = getelementptr i8, ptr %lsr.iv, i32 4
+ %scevgep15 = getelementptr i8, ptr %lsr.iv14, i32 4
%15 = call i32 @llvm.loop.decrement.reg.i32(i32 %6, i32 1)
%16 = icmp ne i32 %15, 0
br i1 %16, label %vector.body, label %middle.block
@@ -49,7 +49,7 @@
ret i32 %res.0.lcssa
}
- define dso_local arm_aapcs_vfpcc i32 @add_var_i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) {
+ define dso_local arm_aapcs_vfpcc i32 @add_var_i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) {
entry:
%cmp10.not = icmp eq i32 %N, 0
%0 = add i32 %N, 3
@@ -65,24 +65,24 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv15 = phi i8* [ %scevgep16, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv15 = phi ptr [ %scevgep16, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %14, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %15, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
- %lsr.iv14 = bitcast i8* %lsr.iv to <4 x i8>*
- %lsr.iv1517 = bitcast i8* %lsr.iv15 to <4 x i8>*
+ %lsr.iv14 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1517 = bitcast ptr %lsr.iv15 to ptr
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
%9 = sub i32 %7, 4
- %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %lsr.iv14, i32 1, <4 x i1> %8, <4 x i8> undef)
+ %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %lsr.iv14, i32 1, <4 x i1> %8, <4 x i8> undef)
%10 = zext <4 x i8> %wide.masked.load to <4 x i32>
- %wide.masked.load13 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %lsr.iv1517, i32 1, <4 x i1> %8, <4 x i8> undef)
+ %wide.masked.load13 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %lsr.iv1517, i32 1, <4 x i1> %8, <4 x i8> undef)
%11 = zext <4 x i8> %wide.masked.load13 to <4 x i32>
%12 = add <4 x i32> %vec.phi, %10
%13 = add <4 x i32> %12, %11
%14 = select <4 x i1> %8, <4 x i32> %13, <4 x i32> %vec.phi
- %scevgep = getelementptr i8, i8* %lsr.iv, i32 4
- %scevgep16 = getelementptr i8, i8* %lsr.iv15, i32 4
+ %scevgep = getelementptr i8, ptr %lsr.iv, i32 4
+ %scevgep16 = getelementptr i8, ptr %lsr.iv15, i32 4
%15 = call i32 @llvm.loop.decrement.reg.i32(i32 %6, i32 1)
%16 = icmp ne i32 %15, 0
br i1 %16, label %vector.body, label %middle.block
@@ -96,7 +96,7 @@
ret i32 %res.0.lcssa
}
- define dso_local arm_aapcs_vfpcc i32 @mul_var_i16(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) {
+ define dso_local arm_aapcs_vfpcc i32 @mul_var_i16(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) {
entry:
%cmp9.not = icmp eq i32 %N, 0
%0 = add i32 %N, 3
@@ -112,24 +112,24 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv14 = phi i16* [ %scevgep15, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %14, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %15, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
- %lsr.iv13 = bitcast i16* %lsr.iv to <4 x i16>*
- %lsr.iv1416 = bitcast i16* %lsr.iv14 to <4 x i16>*
+ %lsr.iv13 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
%9 = sub i32 %7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv13, i32 2, <4 x i1> %8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv13, i32 2, <4 x i1> %8, <4 x i16> undef)
%10 = sext <4 x i16> %wide.masked.load to <4 x i32>
- %wide.masked.load12 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1416, i32 2, <4 x i1> %8, <4 x i16> undef)
+ %wide.masked.load12 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1416, i32 2, <4 x i1> %8, <4 x i16> undef)
%11 = sext <4 x i16> %wide.masked.load12 to <4 x i32>
%12 = mul nsw <4 x i32> %11, %10
%13 = select <4 x i1> %8, <4 x i32> %12, <4 x i32> zeroinitializer
%14 = add <4 x i32> %vec.phi, %13
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
- %scevgep15 = getelementptr i16, i16* %lsr.iv14, i32 4
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
+ %scevgep15 = getelementptr i16, ptr %lsr.iv14, i32 4
%15 = call i32 @llvm.loop.decrement.reg.i32(i32 %6, i32 1)
%16 = icmp ne i32 %15, 0
br i1 %16, label %vector.body, label %middle.block
@@ -143,7 +143,7 @@
ret i32 %res.0.lcssa
}
- define dso_local arm_aapcs_vfpcc i32 @add_var_i16(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) {
+ define dso_local arm_aapcs_vfpcc i32 @add_var_i16(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) {
entry:
%cmp10.not = icmp eq i32 %N, 0
%0 = add i32 %N, 3
@@ -159,24 +159,24 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv15 = phi i16* [ %scevgep16, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv15 = phi ptr [ %scevgep16, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %14, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %15, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
- %lsr.iv14 = bitcast i16* %lsr.iv to <4 x i16>*
- %lsr.iv1517 = bitcast i16* %lsr.iv15 to <4 x i16>*
+ %lsr.iv14 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1517 = bitcast ptr %lsr.iv15 to ptr
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
%9 = sub i32 %7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv14, i32 2, <4 x i1> %8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv14, i32 2, <4 x i1> %8, <4 x i16> undef)
%10 = sext <4 x i16> %wide.masked.load to <4 x i32>
- %wide.masked.load13 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1517, i32 2, <4 x i1> %8, <4 x i16> undef)
+ %wide.masked.load13 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1517, i32 2, <4 x i1> %8, <4 x i16> undef)
%11 = sext <4 x i16> %wide.masked.load13 to <4 x i32>
%12 = add <4 x i32> %vec.phi, %10
%13 = add <4 x i32> %12, %11
%14 = select <4 x i1> %8, <4 x i32> %13, <4 x i32> %vec.phi
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
- %scevgep16 = getelementptr i16, i16* %lsr.iv15, i32 4
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
+ %scevgep16 = getelementptr i16, ptr %lsr.iv15, i32 4
%15 = call i32 @llvm.loop.decrement.reg.i32(i32 %6, i32 1)
%16 = icmp ne i32 %15, 0
br i1 %16, label %vector.body, label %middle.block
@@ -191,7 +191,7 @@
}
; Function Attrs: norecurse nounwind readonly
- define dso_local arm_aapcs_vfpcc i32 @mul_var_i32(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
+ define dso_local arm_aapcs_vfpcc i32 @mul_var_i32(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
entry:
%cmp8.not = icmp eq i32 %N, 0
%0 = add i32 %N, 3
@@ -207,22 +207,22 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv13 = phi i32* [ %scevgep14, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv13 = phi ptr [ %scevgep14, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %13, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
- %lsr.iv12 = bitcast i32* %lsr.iv to <4 x i32>*
- %lsr.iv1315 = bitcast i32* %lsr.iv13 to <4 x i32>*
+ %lsr.iv12 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1315 = bitcast ptr %lsr.iv13 to ptr
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
%9 = sub i32 %7, 4
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv12, i32 4, <4 x i1> %8, <4 x i32> undef)
- %wide.masked.load11 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1315, i32 4, <4 x i1> %8, <4 x i32> undef)
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv12, i32 4, <4 x i1> %8, <4 x i32> undef)
+ %wide.masked.load11 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1315, i32 4, <4 x i1> %8, <4 x i32> undef)
%10 = mul nsw <4 x i32> %wide.masked.load11, %wide.masked.load
%11 = select <4 x i1> %8, <4 x i32> %10, <4 x i32> zeroinitializer
%12 = add <4 x i32> %vec.phi, %11
- %scevgep = getelementptr i32, i32* %lsr.iv, i32 4
- %scevgep14 = getelementptr i32, i32* %lsr.iv13, i32 4
+ %scevgep = getelementptr i32, ptr %lsr.iv, i32 4
+ %scevgep14 = getelementptr i32, ptr %lsr.iv13, i32 4
%13 = call i32 @llvm.loop.decrement.reg.i32(i32 %6, i32 1)
%14 = icmp ne i32 %13, 0
br i1 %14, label %vector.body, label %middle.block
@@ -237,7 +237,7 @@
}
; Function Attrs: norecurse nounwind readonly
- define dso_local arm_aapcs_vfpcc i32 @add_var_i32(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
+ define dso_local arm_aapcs_vfpcc i32 @add_var_i32(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
entry:
%cmp9.not = icmp eq i32 %N, 0
%0 = add i32 %N, 3
@@ -253,22 +253,22 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %13, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
- %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>*
- %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
+ %lsr.iv13 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
%9 = sub i32 %7, 4
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef)
- %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef)
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef)
+ %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef)
%10 = add <4 x i32> %wide.masked.load, %vec.phi
%11 = add <4 x i32> %10, %wide.masked.load12
%12 = select <4 x i1> %8, <4 x i32> %11, <4 x i32> %vec.phi
- %scevgep = getelementptr i32, i32* %lsr.iv, i32 4
- %scevgep15 = getelementptr i32, i32* %lsr.iv14, i32 4
+ %scevgep = getelementptr i32, ptr %lsr.iv, i32 4
+ %scevgep15 = getelementptr i32, ptr %lsr.iv14, i32 4
%13 = call i32 @llvm.loop.decrement.reg.i32(i32 %6, i32 1)
%14 = icmp ne i32 %13, 0
br i1 %14, label %vector.body, label %middle.block
@@ -282,9 +282,9 @@
ret i32 %res.0.lcssa
}
- declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>)
- declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
- declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
+ declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32 immarg, <4 x i1>, <4 x i8>)
+ declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>)
+ declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
index 93119eac2d564..3c4af10b12438 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s
-define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_add_add_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr {
+define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_add_add_v16i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr {
; CHECK-LABEL: one_loop_add_add_v16i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r7, lr}
@@ -35,13 +35,13 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <16 x i8> [ zeroinitializer, %vector.ph ], [ %i5, %vector.body ]
- %i = getelementptr inbounds i8, i8* %a, i32 %index
+ %i = getelementptr inbounds i8, ptr %a, i32 %index
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %N)
- %i1 = bitcast i8* %i to <16 x i8>*
- %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %i1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
- %i2 = getelementptr inbounds i8, i8* %b, i32 %index
- %i3 = bitcast i8* %i2 to <16 x i8>*
- %wide.masked.load16 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %i3, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
+ %i1 = bitcast ptr %i to ptr
+ %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %i1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
+ %i2 = getelementptr inbounds i8, ptr %b, i32 %index
+ %i3 = bitcast ptr %i2 to ptr
+ %wide.masked.load16 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %i3, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
%i4 = add <16 x i8> %wide.masked.load, %wide.masked.load16
%i5 = select <16 x i1> %active.lane.mask, <16 x i8> %i4, <16 x i8> %vec.phi
%i6 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %i5)
@@ -57,7 +57,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr
ret i8 %res.0.lcssa
}
-define dso_local arm_aapcs_vfpcc signext i16 @one_loop_add_add_v8i16(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr {
+define dso_local arm_aapcs_vfpcc signext i16 @one_loop_add_add_v8i16(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr {
; CHECK-LABEL: one_loop_add_add_v8i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r2, #0
@@ -103,14 +103,14 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <8 x i16> [ zeroinitializer, %vector.ph ], [ %i7, %vector.body ]
- %i = getelementptr inbounds i8, i8* %a, i32 %index
+ %i = getelementptr inbounds i8, ptr %a, i32 %index
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N)
- %i1 = bitcast i8* %i to <8 x i8>*
- %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %i1, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef)
+ %i1 = bitcast ptr %i to ptr
+ %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %i1, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef)
%i2 = zext <8 x i8> %wide.masked.load to <8 x i16>
- %i3 = getelementptr inbounds i8, i8* %b, i32 %index
- %i4 = bitcast i8* %i3 to <8 x i8>*
- %wide.masked.load17 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %i4, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef)
+ %i3 = getelementptr inbounds i8, ptr %b, i32 %index
+ %i4 = bitcast ptr %i3 to ptr
+ %wide.masked.load17 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %i4, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef)
%i5 = zext <8 x i8> %wide.masked.load17 to <8 x i16>
%i6 = add <8 x i16> %vec.phi, %i2
%i7 = add <8 x i16> %i6, %i5
@@ -128,7 +128,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr
ret i16 %res.0.lcssa
}
-define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_sub_add_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr {
+define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_sub_add_v16i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr {
; CHECK-LABEL: one_loop_sub_add_v16i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r2, #0
@@ -174,13 +174,13 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <16 x i8> [ zeroinitializer, %vector.ph ], [ %i5, %vector.body ]
- %i = getelementptr inbounds i8, i8* %a, i32 %index
+ %i = getelementptr inbounds i8, ptr %a, i32 %index
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %N)
- %i1 = bitcast i8* %i to <16 x i8>*
- %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %i1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
- %i2 = getelementptr inbounds i8, i8* %b, i32 %index
- %i3 = bitcast i8* %i2 to <16 x i8>*
- %wide.masked.load16 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %i3, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
+ %i1 = bitcast ptr %i to ptr
+ %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %i1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
+ %i2 = getelementptr inbounds i8, ptr %b, i32 %index
+ %i3 = bitcast ptr %i2 to ptr
+ %wide.masked.load16 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %i3, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
%i4 = sub <16 x i8> %wide.masked.load, %wide.masked.load16
%i5 = add <16 x i8> %i4, %vec.phi
%index.next = add i32 %index, 16
@@ -197,7 +197,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr
ret i8 %res.0.lcssa
}
-define dso_local arm_aapcs_vfpcc signext i16 @one_loop_sub_add_v8i16(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr {
+define dso_local arm_aapcs_vfpcc signext i16 @one_loop_sub_add_v8i16(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr {
; CHECK-LABEL: one_loop_sub_add_v8i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r2, #0
@@ -243,14 +243,14 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <8 x i16> [ zeroinitializer, %vector.ph ], [ %i7, %vector.body ]
- %i = getelementptr inbounds i8, i8* %a, i32 %index
+ %i = getelementptr inbounds i8, ptr %a, i32 %index
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N)
- %i1 = bitcast i8* %i to <8 x i8>*
- %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %i1, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef)
+ %i1 = bitcast ptr %i to ptr
+ %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %i1, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef)
%i2 = zext <8 x i8> %wide.masked.load to <8 x i16>
- %i3 = getelementptr inbounds i8, i8* %b, i32 %index
- %i4 = bitcast i8* %i3 to <8 x i8>*
- %wide.masked.load17 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %i4, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef)
+ %i3 = getelementptr inbounds i8, ptr %b, i32 %index
+ %i4 = bitcast ptr %i3 to ptr
+ %wide.masked.load17 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %i4, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef)
%i5 = zext <8 x i8> %wide.masked.load17 to <8 x i16>
%i6 = sub <8 x i16> %i5, %i2
%i7 = add <8 x i16> %i6, %vec.phi
@@ -268,7 +268,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr
ret i16 %res.0.lcssa
}
-define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_mul_add_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr {
+define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_mul_add_v16i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr {
; CHECK-LABEL: one_loop_mul_add_v16i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r2, #0
@@ -314,13 +314,13 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <16 x i8> [ zeroinitializer, %vector.ph ], [ %i5, %vector.body ]
- %i = getelementptr inbounds i8, i8* %a, i32 %index
+ %i = getelementptr inbounds i8, ptr %a, i32 %index
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %N)
- %i1 = bitcast i8* %i to <16 x i8>*
- %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %i1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
- %i2 = getelementptr inbounds i8, i8* %b, i32 %index
- %i3 = bitcast i8* %i2 to <16 x i8>*
- %wide.masked.load15 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %i3, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
+ %i1 = bitcast ptr %i to ptr
+ %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %i1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
+ %i2 = getelementptr inbounds i8, ptr %b, i32 %index
+ %i3 = bitcast ptr %i2 to ptr
+ %wide.masked.load15 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %i3, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
%i4 = mul <16 x i8> %wide.masked.load15, %wide.masked.load
%i5 = add <16 x i8> %i4, %vec.phi
%index.next = add i32 %index, 16
@@ -337,7 +337,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr
ret i8 %res.0.lcssa
}
-define dso_local arm_aapcs_vfpcc signext i16 @one_loop_mul_add_v8i16(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr {
+define dso_local arm_aapcs_vfpcc signext i16 @one_loop_mul_add_v8i16(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr {
; CHECK-LABEL: one_loop_mul_add_v8i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r2, #0
@@ -383,14 +383,14 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <8 x i16> [ zeroinitializer, %vector.ph ], [ %i7, %vector.body ]
- %i = getelementptr inbounds i8, i8* %a, i32 %index
+ %i = getelementptr inbounds i8, ptr %a, i32 %index
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N)
- %i1 = bitcast i8* %i to <8 x i8>*
- %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %i1, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef)
+ %i1 = bitcast ptr %i to ptr
+ %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %i1, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef)
%i2 = zext <8 x i8> %wide.masked.load to <8 x i16>
- %i3 = getelementptr inbounds i8, i8* %b, i32 %index
- %i4 = bitcast i8* %i3 to <8 x i8>*
- %wide.masked.load17 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %i4, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef)
+ %i3 = getelementptr inbounds i8, ptr %b, i32 %index
+ %i4 = bitcast ptr %i3 to ptr
+ %wide.masked.load17 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %i4, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef)
%i5 = zext <8 x i8> %wide.masked.load17 to <8 x i16>
%i6 = mul <8 x i16> %i5, %i2
%i7 = add <8 x i16> %i6, %vec.phi
@@ -408,7 +408,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr
ret i16 %res.0.lcssa
}
-define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr {
+define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr {
; CHECK-LABEL: two_loops_mul_add_v4i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r2, #0
@@ -479,14 +479,14 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %i7, %vector.body ]
- %i = getelementptr inbounds i8, i8* %a, i32 %index
+ %i = getelementptr inbounds i8, ptr %a, i32 %index
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
- %i1 = bitcast i8* %i to <4 x i8>*
- %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %i1, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef)
+ %i1 = bitcast ptr %i to ptr
+ %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %i1, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef)
%i2 = zext <4 x i8> %wide.masked.load to <4 x i32>
- %i3 = getelementptr inbounds i8, i8* %b, i32 %index
- %i4 = bitcast i8* %i3 to <4 x i8>*
- %wide.masked.load43 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %i4, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef)
+ %i3 = getelementptr inbounds i8, ptr %b, i32 %index
+ %i4 = bitcast ptr %i3 to ptr
+ %wide.masked.load43 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %i4, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef)
%i5 = zext <4 x i8> %wide.masked.load43 to <4 x i32>
%i6 = mul nuw nsw <4 x i32> %i5, %i2
%i7 = add <4 x i32> %i6, %vec.phi
@@ -508,14 +508,14 @@ vector.ph47: ; preds = %middle.block
vector.body46: ; preds = %vector.body46, %vector.ph47
%index51 = phi i32 [ 0, %vector.ph47 ], [ %index.next52, %vector.body46 ]
%vec.phi60 = phi <4 x i32> [ %i11, %vector.ph47 ], [ %i19, %vector.body46 ]
- %i12 = getelementptr inbounds i8, i8* %a, i32 %index51
+ %i12 = getelementptr inbounds i8, ptr %a, i32 %index51
%active.lane.mask61 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index51, i32 %N)
- %i13 = bitcast i8* %i12 to <4 x i8>*
- %wide.masked.load62 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %i13, i32 1, <4 x i1> %active.lane.mask61, <4 x i8> undef)
+ %i13 = bitcast ptr %i12 to ptr
+ %wide.masked.load62 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %i13, i32 1, <4 x i1> %active.lane.mask61, <4 x i8> undef)
%i14 = zext <4 x i8> %wide.masked.load62 to <4 x i32>
- %i15 = getelementptr inbounds i8, i8* %b, i32 %index51
- %i16 = bitcast i8* %i15 to <4 x i8>*
- %wide.masked.load63 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %i16, i32 1, <4 x i1> %active.lane.mask61, <4 x i8> undef)
+ %i15 = getelementptr inbounds i8, ptr %b, i32 %index51
+ %i16 = bitcast ptr %i15 to ptr
+ %wide.masked.load63 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %i16, i32 1, <4 x i1> %active.lane.mask61, <4 x i8> undef)
%i17 = zext <4 x i8> %wide.masked.load63 to <4 x i32>
%i18 = mul nuw nsw <4 x i32> %i17, %i14
%i19 = add <4 x i32> %i18, %vec.phi60
@@ -533,7 +533,7 @@ for.cond.cleanup7: ; preds = %middle.block44, %mi
ret i32 %res.1.lcssa
}
-define dso_local arm_aapcs_vfpcc void @two_reductions_mul_add_v8i16(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr {
+define dso_local arm_aapcs_vfpcc void @two_reductions_mul_add_v8i16(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr {
; CHECK-LABEL: two_reductions_mul_add_v8i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r4, lr}
@@ -591,14 +591,14 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <8 x i16> [ zeroinitializer, %vector.ph ], [ %i8, %vector.body ]
%vec.phi.1 = phi <8 x i16> [ zeroinitializer, %vector.ph ], [ %i9, %vector.body ]
- %i = getelementptr inbounds i8, i8* %a, i32 %index
+ %i = getelementptr inbounds i8, ptr %a, i32 %index
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N)
- %i1 = bitcast i8* %i to <8 x i8>*
- %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %i1, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef)
+ %i1 = bitcast ptr %i to ptr
+ %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %i1, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef)
%i2 = zext <8 x i8> %wide.masked.load to <8 x i16>
- %i3 = getelementptr inbounds i8, i8* %b, i32 %index
- %i4 = bitcast i8* %i3 to <8 x i8>*
- %wide.masked.load17 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %i4, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef)
+ %i3 = getelementptr inbounds i8, ptr %b, i32 %index
+ %i4 = bitcast ptr %i3 to ptr
+ %wide.masked.load17 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %i4, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef)
%i5 = zext <8 x i8> %wide.masked.load17 to <8 x i16>
%i6 = mul <8 x i16> %i5, %i2
%i7 = sub <8 x i16> %i5, %i2
@@ -619,15 +619,15 @@ for.cond.cleanup: ; preds = %middle.block, %entr
%res.0.lcssa = phi i16 [ 0, %entry ], [ %i12, %middle.block ]
%res.1.lcssa = phi i16 [ 0, %entry ], [ %i14, %middle.block ]
%trunc.res.0 = trunc i16 %res.0.lcssa to i8
- store i8 %trunc.res.0, i8* %a
+ store i8 %trunc.res.0, ptr %a
%trunc.res.1 = trunc i16 %res.1.lcssa to i8
- store i8 %trunc.res.1, i8* %b
+ store i8 %trunc.res.1, ptr %b
ret void
}
%struct.date = type { i32, i32, i32, i32 }
@days = internal unnamed_addr constant [2 x [13 x i32]] [[13 x i32] [i32 0, i32 31, i32 28, i32 31, i32 30, i32 31, i32 30, i32 31, i32 31, i32 30, i32 31, i32 30, i32 31], [13 x i32] [i32 0, i32 31, i32 29, i32 31, i32 30, i32 31, i32 30, i32 31, i32 31, i32 30, i32 31, i32 30, i32 31]], align 4
-define i32 @wrongop(%struct.date* nocapture readonly %pd) {
+define i32 @wrongop(ptr nocapture readonly %pd) {
; CHECK-LABEL: wrongop:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r4, lr}
@@ -682,10 +682,10 @@ define i32 @wrongop(%struct.date* nocapture readonly %pd) {
; CHECK-NEXT: vaddv.u32 r0, q0
; CHECK-NEXT: pop {r4, pc}
entry:
- %day1 = getelementptr inbounds %struct.date, %struct.date* %pd, i32 0, i32 0
- %0 = load i32, i32* %day1, align 4
- %year = getelementptr inbounds %struct.date, %struct.date* %pd, i32 0, i32 2
- %1 = load i32, i32* %year, align 4
+ %day1 = getelementptr inbounds %struct.date, ptr %pd, i32 0, i32 0
+ %0 = load i32, ptr %day1, align 4
+ %year = getelementptr inbounds %struct.date, ptr %pd, i32 0, i32 2
+ %1 = load i32, ptr %year, align 4
%2 = and i32 %1, 3
%cmp = icmp ne i32 %2, 0
%rem3 = srem i32 %1, 100
@@ -701,8 +701,8 @@ lor.rhs: ; preds = %entry
lor.end: ; preds = %entry, %lor.rhs
%3 = phi i32 [ %phi.cast, %lor.rhs ], [ 1, %entry ]
- %month = getelementptr inbounds %struct.date, %struct.date* %pd, i32 0, i32 1
- %4 = load i32, i32* %month, align 4
+ %month = getelementptr inbounds %struct.date, ptr %pd, i32 0, i32 1
+ %4 = load i32, ptr %month, align 4
%cmp820 = icmp sgt i32 %4, 0
br i1 %cmp820, label %vector.ph, label %for.end
@@ -715,10 +715,10 @@ vector.ph: ; preds = %lor.end
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ %5, %vector.ph ], [ %8, %vector.body ]
- %6 = getelementptr inbounds [2 x [13 x i32]], [2 x [13 x i32]]* @days, i32 0, i32 %3, i32 %index
+ %6 = getelementptr inbounds [2 x [13 x i32]], ptr @days, i32 0, i32 %3, i32 %index
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %4)
- %7 = bitcast i32* %6 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* nonnull %7, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+ %7 = bitcast ptr %6 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr nonnull %7, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
%8 = add <4 x i32> %wide.masked.load, %vec.phi
%index.next = add i32 %index, 4
%9 = icmp eq i32 %index.next, %n.vec
@@ -734,13 +734,13 @@ for.end: ; preds = %middle.block, %lor.
ret i32 %day.0.lcssa
}
-declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
+declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32)
-declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>)
+declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>)
declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)
-declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>)
+declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32 immarg, <8 x i1>, <8 x i8>)
declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
-declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>)
+declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32 immarg, <4 x i1>, <4 x i8>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-header.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-header.mir
index 616c16bda2fd4..eec1c3973923f 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-header.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-header.mir
@@ -13,7 +13,7 @@
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main"
- define void @header_not_target_unrolled_loop(i32* nocapture %v, i32 %n) {
+ define void @header_not_target_unrolled_loop(ptr nocapture %v, i32 %n) {
entry:
%cmp56 = icmp sgt i32 %n, 1
br i1 %cmp56, label %for.cond1.preheader.preheader, label %for.end20
@@ -36,13 +36,13 @@
for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
%tmp2 = mul i32 %gap.057, -4
%tmp6 = mul i32 %gap.057, -2
- %scevgep1 = getelementptr i32, i32* %v, i32 %gap.057
+ %scevgep1 = getelementptr i32, ptr %v, i32 %gap.057
%0 = shl i32 %gap.057, 2
br label %for.cond4.preheader
for.cond4.preheader: ; preds = %for.inc16, %for.cond4.preheader.preheader
- %lsr.iv2 = phi i32* [ %scevgep3, %for.inc16 ], [ %scevgep1, %for.cond4.preheader.preheader ]
- %lsr.iv = phi i32* [ %v, %for.cond4.preheader.preheader ], [ %scevgep, %for.inc16 ]
+ %lsr.iv2 = phi ptr [ %scevgep3, %for.inc16 ], [ %scevgep1, %for.cond4.preheader.preheader ]
+ %lsr.iv = phi ptr [ %v, %for.cond4.preheader.preheader ], [ %scevgep, %for.inc16 ]
%i.053 = phi i32 [ %inc, %for.inc16 ], [ %gap.057, %for.cond4.preheader.preheader ]
%tmp8 = phi i32 [ %start, %for.cond4.preheader.preheader ], [ %tmp16, %for.inc16 ]
%j.048 = sub nsw i32 %i.053, %gap.057
@@ -55,26 +55,26 @@
land.rhs: ; preds = %land.rhs.preheader, %for.body8
%lsr.iv4 = phi i32 [ %lsr.iv.next, %for.body8 ], [ 0, %land.rhs.preheader ]
%j.051 = phi i32 [ %j.0, %for.body8 ], [ %j.048, %land.rhs.preheader ]
- %1 = bitcast i32* %lsr.iv2 to i8*
- %2 = bitcast i32* %lsr.iv to i8*
- %uglygep3 = getelementptr i8, i8* %2, i32 %lsr.iv4
- %uglygep34 = bitcast i8* %uglygep3 to i32*
- %tmp9 = load i32, i32* %uglygep34, align 4
- %uglygep1 = getelementptr i8, i8* %1, i32 %lsr.iv4
- %uglygep12 = bitcast i8* %uglygep1 to i32*
- %tmp12 = load i32, i32* %uglygep12, align 4
+ %1 = bitcast ptr %lsr.iv2 to ptr
+ %2 = bitcast ptr %lsr.iv to ptr
+ %uglygep3 = getelementptr i8, ptr %2, i32 %lsr.iv4
+ %uglygep34 = bitcast ptr %uglygep3 to ptr
+ %tmp9 = load i32, ptr %uglygep34, align 4
+ %uglygep1 = getelementptr i8, ptr %1, i32 %lsr.iv4
+ %uglygep12 = bitcast ptr %uglygep1 to ptr
+ %tmp12 = load i32, ptr %uglygep12, align 4
%cmp7 = icmp sgt i32 %tmp9, %tmp12
br i1 %cmp7, label %for.body8, label %for.inc16
for.body8: ; preds = %land.rhs
- %3 = bitcast i32* %lsr.iv2 to i8*
- %4 = bitcast i32* %lsr.iv to i8*
- %sunkaddr = getelementptr i8, i8* %4, i32 %lsr.iv4
- %5 = bitcast i8* %sunkaddr to i32*
- store i32 %tmp12, i32* %5, align 4
- %uglygep = getelementptr i8, i8* %3, i32 %lsr.iv4
- %uglygep6 = bitcast i8* %uglygep to i32*
- store i32 %tmp9, i32* %uglygep6, align 4
+ %3 = bitcast ptr %lsr.iv2 to ptr
+ %4 = bitcast ptr %lsr.iv to ptr
+ %sunkaddr = getelementptr i8, ptr %4, i32 %lsr.iv4
+ %5 = bitcast ptr %sunkaddr to ptr
+ store i32 %tmp12, ptr %5, align 4
+ %uglygep = getelementptr i8, ptr %3, i32 %lsr.iv4
+ %uglygep6 = bitcast ptr %uglygep to ptr
+ store i32 %tmp9, ptr %uglygep6, align 4
%j.0 = sub nsw i32 %j.051, %gap.057
%lsr.iv.next = add i32 %lsr.iv4, %0
%cmp5 = icmp sgt i32 %j.0, -1
@@ -82,10 +82,10 @@
for.inc16: ; preds = %for.body8, %land.rhs, %for.cond4.preheader
%inc = add nsw i32 %i.053, 1
- %scevgep = getelementptr i32, i32* %lsr.iv, i32 1
+ %scevgep = getelementptr i32, ptr %lsr.iv, i32 1
%tmp16 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %tmp8, i32 1)
%tmp17 = icmp ne i32 %tmp16, 0
- %scevgep3 = getelementptr i32, i32* %lsr.iv2, i32 1
+ %scevgep3 = getelementptr i32, ptr %lsr.iv2, i32 1
br i1 %tmp17, label %for.cond4.preheader, label %for.cond.loopexit
for.end20: ; preds = %for.cond.loopexit, %entry
@@ -99,7 +99,7 @@
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
; Function Attrs: nounwind
- declare void @llvm.stackprotector(i8*, i8**) #1
+ declare void @llvm.stackprotector(ptr, ptr) #1
attributes #0 = { noduplicate nounwind }
attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir
index 5966df967bb64..12bc894814605 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir
@@ -1,12 +1,12 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s
--- |
- define dso_local arm_aapcs_vfpcc void @skip_debug(i32* nocapture %a, i16* nocapture readonly %b, i32 %N) !dbg !8 {
+ define dso_local arm_aapcs_vfpcc void @skip_debug(ptr nocapture %a, ptr nocapture readonly %b, i32 %N) !dbg !8 {
entry:
- call void @llvm.dbg.value(metadata i32* %a, metadata !17, metadata !DIExpression()), !dbg !23
- call void @llvm.dbg.value(metadata i16* %b, metadata !18, metadata !DIExpression()), !dbg !23
+ call void @llvm.dbg.value(metadata ptr %a, metadata !17, metadata !DIExpression()), !dbg !23
+ call void @llvm.dbg.value(metadata ptr %b, metadata !18, metadata !DIExpression()), !dbg !23
call void @llvm.dbg.value(metadata i32 %N, metadata !19, metadata !DIExpression()), !dbg !23
- %0 = load i32, i32* %a, align 4, !dbg !24
+ %0 = load i32, ptr %a, align 4, !dbg !24
call void @llvm.dbg.value(metadata i32 %0, metadata !20, metadata !DIExpression()), !dbg !23
call void @llvm.dbg.value(metadata i32 0, metadata !21, metadata !DIExpression()), !dbg !29
%cmp7 = icmp eq i32 %N, 0, !dbg !30
@@ -26,17 +26,17 @@
br label %vector.body, !dbg !32
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %b, %vector.ph ], !dbg !33
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %b, %vector.ph ], !dbg !33
%vec.phi = phi <4 x i32> [ %7, %vector.ph ], [ %15, %vector.body ]
%10 = phi i32 [ %start, %vector.ph ], [ %16, %vector.body ]
%11 = phi i32 [ %N, %vector.ph ], [ %13, %vector.body ]
- %lsr.iv14 = bitcast i16* %lsr.iv to <4 x i16>*
+ %lsr.iv14 = bitcast ptr %lsr.iv to ptr
%12 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %11), !dbg !34
%13 = sub i32 %11, 4, !dbg !34
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv14, i32 2, <4 x i1> %12, <4 x i16> undef), !dbg !34
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv14, i32 2, <4 x i1> %12, <4 x i16> undef), !dbg !34
%14 = sext <4 x i16> %wide.masked.load to <4 x i32>, !dbg !34
%15 = sub <4 x i32> %vec.phi, %14, !dbg !38
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4, !dbg !33
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4, !dbg !33
%16 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %10, i32 1), !dbg !33
%17 = icmp ne i32 %16, 0, !dbg !33
br i1 %17, label %vector.body, label %middle.block, !dbg !33
@@ -52,12 +52,12 @@
for.cond.cleanup: ; preds = %middle.block, %entry
%temp.0.lcssa = phi i32 [ %0, %entry ], [ %20, %middle.block ], !dbg !23
call void @llvm.dbg.value(metadata i32 %temp.0.lcssa, metadata !20, metadata !DIExpression()), !dbg !23
- store i32 %temp.0.lcssa, i32* %a, align 4, !dbg !42
+ store i32 %temp.0.lcssa, ptr %a, align 4, !dbg !42
ret void, !dbg !43
}
declare void @llvm.dbg.value(metadata, metadata, metadata)
- declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
+ declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir
index 1bfe8f9d289cd..6c9cd153e03cd 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir
@@ -7,11 +7,11 @@
target triple = "thumbv8.1m.main-arm-none-eabihf"
; Function Attrs: nofree norecurse nounwind optsize
- define hidden void @arm_max_no_idx_f32(float* nocapture readonly %pSrc, i32 %blockSize, float* nocapture %pResult) local_unnamed_addr #0 !dbg !13 {
+ define hidden void @arm_max_no_idx_f32(ptr nocapture readonly %pSrc, i32 %blockSize, ptr nocapture %pResult) local_unnamed_addr #0 !dbg !13 {
entry:
- call void @llvm.dbg.value(metadata float* %pSrc, metadata !24, metadata !DIExpression()), !dbg !29
+ call void @llvm.dbg.value(metadata ptr %pSrc, metadata !24, metadata !DIExpression()), !dbg !29
call void @llvm.dbg.value(metadata i32 %blockSize, metadata !25, metadata !DIExpression()), !dbg !29
- call void @llvm.dbg.value(metadata float* %pResult, metadata !26, metadata !DIExpression()), !dbg !29
+ call void @llvm.dbg.value(metadata ptr %pResult, metadata !26, metadata !DIExpression()), !dbg !29
call void @llvm.dbg.value(metadata float 0x3810000000000000, metadata !27, metadata !DIExpression()), !dbg !29
%cmp.not7 = icmp eq i32 %blockSize, 0, !dbg !30
br i1 %cmp.not7, label %while.end, label %vector.ph, !dbg !31
@@ -26,18 +26,18 @@
br label %vector.body, !dbg !31
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv1 = phi float* [ %scevgep, %vector.body ], [ %pSrc, %vector.ph ]
+ %lsr.iv1 = phi ptr [ %scevgep, %vector.body ], [ %pSrc, %vector.ph ]
%vec.phi = phi <4 x float> [ <float 0x3810000000000000, float 0x3810000000000000, float 0x3810000000000000, float 0x3810000000000000>, %vector.ph ], [ %10, %vector.body ]
%4 = phi i32 [ %3, %vector.ph ], [ %11, %vector.body ]
%5 = phi i32 [ %blockSize, %vector.ph ], [ %7, %vector.body ]
- %lsr.iv12 = bitcast float* %lsr.iv1 to <4 x float>*
+ %lsr.iv12 = bitcast ptr %lsr.iv1 to ptr
%6 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %5)
%7 = sub i32 %5, 4
- %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %lsr.iv12, i32 4, <4 x i1> %6, <4 x float> poison), !dbg !32, !tbaa !34
+ %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %lsr.iv12, i32 4, <4 x i1> %6, <4 x float> poison), !dbg !32, !tbaa !34
%8 = fcmp nnan ninf nsz olt <4 x float> %vec.phi, %wide.masked.load, !dbg !38
%9 = and <4 x i1> %6, %8, !dbg !40
%10 = select <4 x i1> %9, <4 x float> %wide.masked.load, <4 x float> %vec.phi, !dbg !40
- %scevgep = getelementptr float, float* %lsr.iv1, i32 4
+ %scevgep = getelementptr float, ptr %lsr.iv1, i32 4
%11 = call i32 @llvm.loop.decrement.reg.i32(i32 %4, i32 1)
%12 = icmp ne i32 %11, 0
br i1 %12, label %vector.body, label %middle.block, !llvm.loop !41
@@ -48,7 +48,7 @@
while.end: ; preds = %middle.block, %entry
%maxValue.0.lcssa = phi float [ 0x3810000000000000, %entry ], [ %13, %middle.block ], !dbg !29
- store float %maxValue.0.lcssa, float* %pResult, align 4, !dbg !45, !tbaa !34
+ store float %maxValue.0.lcssa, ptr %pResult, align 4, !dbg !45, !tbaa !34
ret void, !dbg !46
}
@@ -59,7 +59,7 @@
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) #2
; Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
- declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #3
+ declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) #3
; Function Attrs: nofree nosync nounwind readnone willreturn
declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) #2
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/subreg-liveness.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/subreg-liveness.mir
index 3b142e7ba2d41..046b5bf2f349a 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/subreg-liveness.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/subreg-liveness.mir
@@ -5,7 +5,7 @@
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-arm-none-eabi"
- define i32 @test(i8* nocapture readnone %x, i32* noalias %y, i32 %n, <4 x i32> %0, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, <4 x i32> %p) {
+ define i32 @test(ptr nocapture readnone %x, ptr noalias %y, i32 %n, <4 x i32> %0, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, <4 x i32> %p) {
entry:
%cmp13 = icmp sgt i32 %n, 0
br i1 %cmp13, label %while.body.preheader, label %while.end
@@ -20,15 +20,15 @@
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
- %y.addr.016 = phi i32* [ %add.ptr, %while.body ], [ %y, %while.body.preheader ]
+ %y.addr.016 = phi ptr [ %add.ptr, %while.body ], [ %y, %while.body.preheader ]
%s.015 = phi <4 x i32> [ %mul, %while.body ], [ <i32 1, i32 1, i32 1, i32 1>, %while.body.preheader ]
%n.addr.014 = phi i32 [ %12, %while.body ], [ %n, %while.body.preheader ]
%9 = phi i32 [ %8, %while.body.preheader ], [ %13, %while.body ]
- %y.addr.0161 = bitcast i32* %y.addr.016 to <4 x i32>*
+ %y.addr.0161 = bitcast ptr %y.addr.016 to ptr
%10 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %n.addr.014)
- %11 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %y.addr.0161, i32 4, <4 x i1> %10, <4 x i32> zeroinitializer)
+ %11 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %y.addr.0161, i32 4, <4 x i1> %10, <4 x i32> zeroinitializer)
%mul = mul <4 x i32> %11, %s.015
- %add.ptr = getelementptr inbounds i32, i32* %y.addr.016, i32 4
+ %add.ptr = getelementptr inbounds i32, ptr %y.addr.016, i32 4
%12 = add i32 %n.addr.014, -4
%13 = call i32 @llvm.loop.decrement.reg.i32(i32 %9, i32 1)
%14 = icmp ne i32 %13, 0
@@ -43,7 +43,7 @@
}
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1
- declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #2
+ declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #2
declare i32 @llvm.smin.i32(i32, i32) #3
declare i32 @llvm.start.loop.iterations.i32(i32) #4
declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #4
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll
index 7777d7a6894a2..c355f8f20e5af 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s
-define dso_local arm_aapcs_vfpcc i32 @minmaxval4(i32* nocapture readonly %x, i32* nocapture %minp) {
+define dso_local arm_aapcs_vfpcc i32 @minmaxval4(ptr nocapture readonly %x, ptr nocapture %minp) {
; CHECK-LABEL: minmaxval4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r7, lr}
@@ -31,10 +31,10 @@ vector.body: ; preds = %vector.body, %entry
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, %entry ], [ %5, %vector.body ]
%vec.phi29 = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, %entry ], [ %7, %vector.body ]
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 10)
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
%2 = icmp sgt <4 x i32> %wide.masked.load, %vec.phi29
%3 = icmp slt <4 x i32> %wide.masked.load, %vec.phi
%4 = and <4 x i1> %active.lane.mask, %3
@@ -48,12 +48,12 @@ vector.body: ; preds = %vector.body, %entry
middle.block: ; preds = %vector.body
%9 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %7)
%10 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %5)
- store i32 %10, i32* %minp, align 4
+ store i32 %10, ptr %minp, align 4
ret i32 %9
}
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) #1
-declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #2
+declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #2
declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) #3
declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) #3
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir
index 57cfaa8813734..9afdce1d92805 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve %s -run-pass=arm-low-overhead-loops -o - | FileCheck %s
--- |
- define dso_local void @variant_max_use(i16* nocapture readonly %a, i16* %c, i32 %N) #0 {
+ define dso_local void @variant_max_use(ptr nocapture readonly %a, ptr %c, i32 %N) #0 {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -19,17 +19,17 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
- %lsr.iv.2 = phi i16* [ %scevgep.2, %vector.body ], [ %c, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv.2 = phi ptr [ %scevgep.2, %vector.body ], [ %c, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <8 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
%tmp8 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 8
- %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef)
+ %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef)
%min = tail call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %wide.masked.load)
- store i16 %min, i16* %lsr.iv.2
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 8
- %scevgep.2 = getelementptr i16, i16* %lsr.iv.2, i32 1
+ store i16 %min, ptr %lsr.iv.2
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 8
+ %scevgep.2 = getelementptr i16, ptr %lsr.iv.2, i32 1
%tmp10 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp11 = icmp ne i32 %tmp10, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -39,7 +39,7 @@
ret void
}
- declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>)
+ declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <8 x i1> @llvm.arm.mve.vctp16(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll
index 888fbcc0ef106..6b5b6b2b1b677 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=armv8.1m.main -mattr=+mve -tail-predication=enabled --verify-machineinstrs %s -o - | FileCheck %s
-define void @arm_cmplx_mag_squared_q15_mve(i16* %pSrc, i16* %pDst, i32 %blockSize) {
+define void @arm_cmplx_mag_squared_q15_mve(ptr %pSrc, ptr %pDst, i32 %blockSize) {
; CHECK-LABEL: arm_cmplx_mag_squared_q15_mve:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r7, lr}
@@ -24,20 +24,20 @@ entry:
do.body: ; preds = %do.body, %entry
%blockSize.addr.0 = phi i32 [ %blockSize, %entry ], [ %sub, %do.body ]
- %pDst.addr.0 = phi i16* [ %pDst, %entry ], [ %add.ptr7, %do.body ]
- %pSrc.addr.0 = phi i16* [ %pSrc, %entry ], [ %add.ptr, %do.body ]
+ %pDst.addr.0 = phi ptr [ %pDst, %entry ], [ %add.ptr7, %do.body ]
+ %pSrc.addr.0 = phi ptr [ %pSrc, %entry ], [ %add.ptr, %do.body ]
%0 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %blockSize.addr.0)
- %1 = tail call { <8 x i16>, <8 x i16> } @llvm.arm.mve.vld2q.v8i16.p0i16(i16* %pSrc.addr.0)
+ %1 = tail call { <8 x i16>, <8 x i16> } @llvm.arm.mve.vld2q.v8i16.p0(ptr %pSrc.addr.0)
%2 = extractvalue { <8 x i16>, <8 x i16> } %1, 0
%3 = extractvalue { <8 x i16>, <8 x i16> } %1, 1
%4 = tail call <8 x i16> @llvm.arm.mve.mulh.predicated.v8i16.v8i1(<8 x i16> %2, <8 x i16> %2, i32 0, <8 x i1> %0, <8 x i16> undef)
%5 = tail call <8 x i16> @llvm.arm.mve.mulh.predicated.v8i16.v8i1(<8 x i16> %3, <8 x i16> %3, i32 0, <8 x i1> %0, <8 x i16> undef)
%6 = tail call <8 x i16> @llvm.arm.mve.qadd.predicated.v8i16.v8i1(<8 x i16> %4, <8 x i16> %5, i32 0, <8 x i1> %0, <8 x i16> undef)
%7 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %6, i32 1, i32 0, <8 x i1> %0, <8 x i16> undef)
- %8 = bitcast i16* %pDst.addr.0 to <8 x i16>*
- tail call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %7, <8 x i16>* %8, i32 2, <8 x i1> %0)
- %add.ptr = getelementptr inbounds i16, i16* %pSrc.addr.0, i32 16
- %add.ptr7 = getelementptr inbounds i16, i16* %pDst.addr.0, i32 8
+ %8 = bitcast ptr %pDst.addr.0 to ptr
+ tail call void @llvm.masked.store.v8i16.p0(<8 x i16> %7, ptr %8, i32 2, <8 x i1> %0)
+ %add.ptr = getelementptr inbounds i16, ptr %pSrc.addr.0, i32 16
+ %add.ptr7 = getelementptr inbounds i16, ptr %pDst.addr.0, i32 8
%sub = add i32 %blockSize.addr.0, -8
%cmp = icmp sgt i32 %sub, 0
br i1 %cmp, label %do.body, label %do.end
@@ -46,7 +46,7 @@ do.end: ; preds = %do.body
ret void
}
-define i32 @bad(i32* readonly %x, i32* nocapture readonly %y, i32 %n) {
+define i32 @bad(ptr readonly %x, ptr nocapture readonly %y, i32 %n) {
; CHECK-LABEL: bad:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r7, lr}
@@ -78,15 +78,15 @@ entry:
do.body: ; preds = %do.body, %entry
%s.0 = phi i32 [ 0, %entry ], [ %5, %do.body ]
%n.addr.0 = phi i32 [ %n, %entry ], [ %sub, %do.body ]
- %y.addr.0 = phi i32* [ %y, %entry ], [ %add.ptr1, %do.body ]
- %x.addr.0 = phi i32* [ %x, %entry ], [ %add.ptr, %do.body ]
+ %y.addr.0 = phi ptr [ %y, %entry ], [ %add.ptr1, %do.body ]
+ %x.addr.0 = phi ptr [ %x, %entry ], [ %add.ptr, %do.body ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %n.addr.0)
- %1 = bitcast i32* %x.addr.0 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.0, i32 4
- %3 = bitcast i32* %y.addr.0 to <4 x i32>*
- %4 = load <4 x i32>, <4 x i32>* %3, align 4
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.0, i32 4
+ %1 = bitcast ptr %x.addr.0 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.0, i32 4
+ %3 = bitcast ptr %y.addr.0 to ptr
+ %4 = load <4 x i32>, ptr %3, align 4
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.0, i32 4
%5 = tail call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 0, i32 0, i32 %s.0, <4 x i32> %2, <4 x i32> %4)
%sub = add nsw i32 %n.addr.0, -4
%cmp = icmp sgt i32 %n.addr.0, 4
@@ -96,7 +96,7 @@ do.end: ; preds = %do.body
ret i32 %5
}
-define i32 @good(i32* readonly %x, i32* readonly %y, i32 %n) {
+define i32 @good(ptr readonly %x, ptr readonly %y, i32 %n) {
; CHECK-LABEL: good:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r7, lr}
@@ -117,15 +117,15 @@ entry:
do.body: ; preds = %do.body, %entry
%s.0 = phi i32 [ 0, %entry ], [ %5, %do.body ]
%n.addr.0 = phi i32 [ %n, %entry ], [ %sub, %do.body ]
- %y.addr.0 = phi i32* [ %y, %entry ], [ %add.ptr1, %do.body ]
- %x.addr.0 = phi i32* [ %x, %entry ], [ %add.ptr, %do.body ]
+ %y.addr.0 = phi ptr [ %y, %entry ], [ %add.ptr1, %do.body ]
+ %x.addr.0 = phi ptr [ %x, %entry ], [ %add.ptr, %do.body ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %n.addr.0)
- %1 = bitcast i32* %x.addr.0 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.0, i32 4
- %3 = bitcast i32* %y.addr.0 to <4 x i32>*
- %4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.0, i32 4
+ %1 = bitcast ptr %x.addr.0 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.0, i32 4
+ %3 = bitcast ptr %y.addr.0 to ptr
+ %4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %3, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.0, i32 4
%5 = tail call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 0, i32 0, i32 %s.0, <4 x i32> %2, <4 x i32> %4)
%sub = add nsw i32 %n.addr.0, -4
%cmp = icmp sgt i32 %n.addr.0, 4
@@ -135,7 +135,7 @@ do.end: ; preds = %do.body
ret i32 %5
}
-define i32 @good2(i32* nocapture readonly %x, i32* nocapture readonly %y, i32 %n) {
+define i32 @good2(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) {
; CHECK-LABEL: good2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r7, lr}
@@ -156,15 +156,15 @@ entry:
do.body: ; preds = %do.body, %entry
%s.0 = phi i32 [ 0, %entry ], [ %5, %do.body ]
%n.addr.0 = phi i32 [ %n, %entry ], [ %sub, %do.body ]
- %y.addr.0 = phi i32* [ %y, %entry ], [ %add.ptr1, %do.body ]
- %x.addr.0 = phi i32* [ %x, %entry ], [ %add.ptr, %do.body ]
+ %y.addr.0 = phi ptr [ %y, %entry ], [ %add.ptr1, %do.body ]
+ %x.addr.0 = phi ptr [ %x, %entry ], [ %add.ptr, %do.body ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %n.addr.0)
- %1 = bitcast i32* %x.addr.0 to <4 x i32>*
- %2 = load <4 x i32>, <4 x i32>* %1, align 4
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.0, i32 4
- %3 = bitcast i32* %y.addr.0 to <4 x i32>*
- %4 = load <4 x i32>, <4 x i32>* %3, align 4
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.0, i32 4
+ %1 = bitcast ptr %x.addr.0 to ptr
+ %2 = load <4 x i32>, ptr %1, align 4
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.0, i32 4
+ %3 = bitcast ptr %y.addr.0 to ptr
+ %4 = load <4 x i32>, ptr %3, align 4
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.0, i32 4
%5 = tail call i32 @llvm.arm.mve.vmldava.predicated.v4i32.v4i1(i32 0, i32 0, i32 0, i32 %s.0, <4 x i32> %2, <4 x i32> %4, <4 x i1> %0)
%sub = add nsw i32 %n.addr.0, -4
%cmp = icmp sgt i32 %n.addr.0, 4
@@ -175,12 +175,12 @@ do.end: ; preds = %do.body
}
declare <8 x i1> @llvm.arm.mve.vctp16(i32)
-declare { <8 x i16>, <8 x i16> } @llvm.arm.mve.vld2q.v8i16.p0i16(i16*)
+declare { <8 x i16>, <8 x i16> } @llvm.arm.mve.vld2q.v8i16.p0(ptr)
declare <8 x i16> @llvm.arm.mve.mulh.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>, <8 x i16>)
declare <8 x i16> @llvm.arm.mve.qadd.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>, <8 x i16>)
declare <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>)
-declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>)
+declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>)
declare i32 @llvm.arm.mve.vmldava.predicated.v4i32.v4i1(i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i1>)
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1
-declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #2
+declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #2
declare i32 @llvm.arm.mve.vmldava.v4i32(i32, i32, i32, i32, <4 x i32>, <4 x i32>) #1
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unrolled-and-vector.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unrolled-and-vector.mir
index 7e2eda863d5da..482a87ee6fb57 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unrolled-and-vector.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unrolled-and-vector.mir
@@ -2,20 +2,20 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
--- |
- define dso_local arm_aapcs_vfpcc void @unrolled_and_vector(i8* nocapture %res, i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) {
+ define dso_local arm_aapcs_vfpcc void @unrolled_and_vector(ptr nocapture %res, ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) {
entry:
%cmp10 = icmp eq i32 %N, 0
br i1 %cmp10, label %for.cond.cleanup, label %vector.memcheck
vector.memcheck: ; preds = %entry
- %scevgep = getelementptr i8, i8* %res, i32 %N
- %scevgep12 = getelementptr i8, i8* %a, i32 %N
- %scevgep13 = getelementptr i8, i8* %b, i32 %N
- %bound0 = icmp ugt i8* %scevgep12, %res
- %bound1 = icmp ugt i8* %scevgep, %a
+ %scevgep = getelementptr i8, ptr %res, i32 %N
+ %scevgep12 = getelementptr i8, ptr %a, i32 %N
+ %scevgep13 = getelementptr i8, ptr %b, i32 %N
+ %bound0 = icmp ugt ptr %scevgep12, %res
+ %bound1 = icmp ugt ptr %scevgep, %a
%found.conflict = and i1 %bound0, %bound1
- %bound014 = icmp ugt i8* %scevgep13, %res
- %bound115 = icmp ugt i8* %scevgep, %b
+ %bound014 = icmp ugt ptr %scevgep13, %res
+ %bound115 = icmp ugt ptr %scevgep, %b
%found.conflict16 = and i1 %bound014, %bound115
%conflict.rdx = or i1 %found.conflict, %found.conflict16
%0 = add i32 %N, 15
@@ -45,23 +45,23 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv50 = phi i8* [ %scevgep51, %vector.body ], [ %res, %vector.ph ]
- %lsr.iv47 = phi i8* [ %scevgep48, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi i8* [ %scevgep45, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv50 = phi ptr [ %scevgep51, %vector.body ], [ %res, %vector.ph ]
+ %lsr.iv47 = phi ptr [ %scevgep48, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep45, %vector.body ], [ %a, %vector.ph ]
%12 = phi i32 [ %start2, %vector.ph ], [ %17, %vector.body ]
%13 = phi i32 [ %N, %vector.ph ], [ %15, %vector.body ]
- %lsr.iv5052 = bitcast i8* %lsr.iv50 to <16 x i8>*
- %lsr.iv4749 = bitcast i8* %lsr.iv47 to <16 x i8>*
- %lsr.iv46 = bitcast i8* %lsr.iv to <16 x i8>*
+ %lsr.iv5052 = bitcast ptr %lsr.iv50 to ptr
+ %lsr.iv4749 = bitcast ptr %lsr.iv47 to ptr
+ %lsr.iv46 = bitcast ptr %lsr.iv to ptr
%14 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %13)
%15 = sub i32 %13, 16
- %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv46, i32 1, <16 x i1> %14, <16 x i8> undef)
- %wide.masked.load19 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv4749, i32 1, <16 x i1> %14, <16 x i8> undef)
+ %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv46, i32 1, <16 x i1> %14, <16 x i8> undef)
+ %wide.masked.load19 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv4749, i32 1, <16 x i1> %14, <16 x i8> undef)
%16 = add <16 x i8> %wide.masked.load19, %wide.masked.load
- call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %16, <16 x i8>* %lsr.iv5052, i32 1, <16 x i1> %14)
- %scevgep45 = getelementptr i8, i8* %lsr.iv, i32 16
- %scevgep48 = getelementptr i8, i8* %lsr.iv47, i32 16
- %scevgep51 = getelementptr i8, i8* %lsr.iv50, i32 16
+ call void @llvm.masked.store.v16i8.p0(<16 x i8> %16, ptr %lsr.iv5052, i32 1, <16 x i1> %14)
+ %scevgep45 = getelementptr i8, ptr %lsr.iv, i32 16
+ %scevgep48 = getelementptr i8, ptr %lsr.iv47, i32 16
+ %scevgep51 = getelementptr i8, ptr %lsr.iv50, i32 16
%17 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %12, i32 1)
%18 = icmp ne i32 %17, 0
br i1 %18, label %vector.body, label %for.cond.cleanup
@@ -72,13 +72,13 @@
br i1 %lcmp.mod, label %for.cond.cleanup, label %for.body.epil
for.body.epil: ; preds = %for.cond.cleanup.loopexit.unr-lcssa
- %arrayidx.epil = getelementptr inbounds i8, i8* %a, i32 %i.011.unr
- %19 = load i8, i8* %arrayidx.epil, align 1
- %arrayidx1.epil = getelementptr inbounds i8, i8* %b, i32 %i.011.unr
- %20 = load i8, i8* %arrayidx1.epil, align 1
+ %arrayidx.epil = getelementptr inbounds i8, ptr %a, i32 %i.011.unr
+ %19 = load i8, ptr %arrayidx.epil, align 1
+ %arrayidx1.epil = getelementptr inbounds i8, ptr %b, i32 %i.011.unr
+ %20 = load i8, ptr %arrayidx1.epil, align 1
%add.epil = add i8 %20, %19
- %arrayidx4.epil = getelementptr inbounds i8, i8* %res, i32 %i.011.unr
- store i8 %add.epil, i8* %arrayidx4.epil, align 1
+ %arrayidx4.epil = getelementptr inbounds i8, ptr %res, i32 %i.011.unr
+ store i8 %add.epil, ptr %arrayidx4.epil, align 1
%inc.epil = add nuw i32 %i.011.unr, 1
%epil.iter.cmp = icmp eq i32 %xtraiter, 1
br i1 %epil.iter.cmp, label %for.cond.cleanup, label %for.body.epil.1
@@ -89,76 +89,76 @@
for.body: ; preds = %for.body, %for.body.preheader.new
%i.011 = phi i32 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ]
%21 = phi i32 [ %start1, %for.body.preheader.new ], [ %30, %for.body ]
- %scevgep23 = getelementptr i8, i8* %a, i32 %i.011
- %scevgep2453 = bitcast i8* %scevgep23 to i8*
- %22 = load i8, i8* %scevgep2453, align 1
- %scevgep27 = getelementptr i8, i8* %b, i32 %i.011
- %scevgep2854 = bitcast i8* %scevgep27 to i8*
- %23 = load i8, i8* %scevgep2854, align 1
+ %scevgep23 = getelementptr i8, ptr %a, i32 %i.011
+ %scevgep2453 = bitcast ptr %scevgep23 to ptr
+ %22 = load i8, ptr %scevgep2453, align 1
+ %scevgep27 = getelementptr i8, ptr %b, i32 %i.011
+ %scevgep2854 = bitcast ptr %scevgep27 to ptr
+ %23 = load i8, ptr %scevgep2854, align 1
%add = add i8 %23, %22
- %scevgep31 = getelementptr i8, i8* %res, i32 %i.011
- %scevgep3255 = bitcast i8* %scevgep31 to i8*
- store i8 %add, i8* %scevgep3255, align 1
- %scevgep39 = getelementptr i8, i8* %a, i32 %i.011
- %scevgep40 = getelementptr i8, i8* %scevgep39, i32 1
- %24 = load i8, i8* %scevgep40, align 1
- %scevgep41 = getelementptr i8, i8* %b, i32 %i.011
- %scevgep42 = getelementptr i8, i8* %scevgep41, i32 1
- %25 = load i8, i8* %scevgep42, align 1
+ %scevgep31 = getelementptr i8, ptr %res, i32 %i.011
+ %scevgep3255 = bitcast ptr %scevgep31 to ptr
+ store i8 %add, ptr %scevgep3255, align 1
+ %scevgep39 = getelementptr i8, ptr %a, i32 %i.011
+ %scevgep40 = getelementptr i8, ptr %scevgep39, i32 1
+ %24 = load i8, ptr %scevgep40, align 1
+ %scevgep41 = getelementptr i8, ptr %b, i32 %i.011
+ %scevgep42 = getelementptr i8, ptr %scevgep41, i32 1
+ %25 = load i8, ptr %scevgep42, align 1
%add.1 = add i8 %25, %24
- %scevgep43 = getelementptr i8, i8* %res, i32 %i.011
- %scevgep44 = getelementptr i8, i8* %scevgep43, i32 1
- store i8 %add.1, i8* %scevgep44, align 1
- %scevgep33 = getelementptr i8, i8* %a, i32 %i.011
- %scevgep34 = getelementptr i8, i8* %scevgep33, i32 2
- %26 = load i8, i8* %scevgep34, align 1
- %scevgep35 = getelementptr i8, i8* %b, i32 %i.011
- %scevgep36 = getelementptr i8, i8* %scevgep35, i32 2
- %27 = load i8, i8* %scevgep36, align 1
+ %scevgep43 = getelementptr i8, ptr %res, i32 %i.011
+ %scevgep44 = getelementptr i8, ptr %scevgep43, i32 1
+ store i8 %add.1, ptr %scevgep44, align 1
+ %scevgep33 = getelementptr i8, ptr %a, i32 %i.011
+ %scevgep34 = getelementptr i8, ptr %scevgep33, i32 2
+ %26 = load i8, ptr %scevgep34, align 1
+ %scevgep35 = getelementptr i8, ptr %b, i32 %i.011
+ %scevgep36 = getelementptr i8, ptr %scevgep35, i32 2
+ %27 = load i8, ptr %scevgep36, align 1
%add.2 = add i8 %27, %26
- %scevgep37 = getelementptr i8, i8* %res, i32 %i.011
- %scevgep38 = getelementptr i8, i8* %scevgep37, i32 2
- store i8 %add.2, i8* %scevgep38, align 1
- %scevgep21 = getelementptr i8, i8* %a, i32 %i.011
- %scevgep22 = getelementptr i8, i8* %scevgep21, i32 3
- %28 = load i8, i8* %scevgep22, align 1
- %scevgep25 = getelementptr i8, i8* %b, i32 %i.011
- %scevgep26 = getelementptr i8, i8* %scevgep25, i32 3
- %29 = load i8, i8* %scevgep26, align 1
+ %scevgep37 = getelementptr i8, ptr %res, i32 %i.011
+ %scevgep38 = getelementptr i8, ptr %scevgep37, i32 2
+ store i8 %add.2, ptr %scevgep38, align 1
+ %scevgep21 = getelementptr i8, ptr %a, i32 %i.011
+ %scevgep22 = getelementptr i8, ptr %scevgep21, i32 3
+ %28 = load i8, ptr %scevgep22, align 1
+ %scevgep25 = getelementptr i8, ptr %b, i32 %i.011
+ %scevgep26 = getelementptr i8, ptr %scevgep25, i32 3
+ %29 = load i8, ptr %scevgep26, align 1
%add.3 = add i8 %29, %28
- %scevgep29 = getelementptr i8, i8* %res, i32 %i.011
- %scevgep30 = getelementptr i8, i8* %scevgep29, i32 3
- store i8 %add.3, i8* %scevgep30, align 1
+ %scevgep29 = getelementptr i8, ptr %res, i32 %i.011
+ %scevgep30 = getelementptr i8, ptr %scevgep29, i32 3
+ store i8 %add.3, ptr %scevgep30, align 1
%inc.3 = add nuw i32 %i.011, 4
%30 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %21, i32 1)
%31 = icmp ne i32 %30, 0
br i1 %31, label %for.body, label %for.cond.cleanup.loopexit.unr-lcssa
for.body.epil.1: ; preds = %for.body.epil
- %arrayidx.epil.1 = getelementptr inbounds i8, i8* %a, i32 %inc.epil
- %32 = load i8, i8* %arrayidx.epil.1, align 1
- %arrayidx1.epil.1 = getelementptr inbounds i8, i8* %b, i32 %inc.epil
- %33 = load i8, i8* %arrayidx1.epil.1, align 1
+ %arrayidx.epil.1 = getelementptr inbounds i8, ptr %a, i32 %inc.epil
+ %32 = load i8, ptr %arrayidx.epil.1, align 1
+ %arrayidx1.epil.1 = getelementptr inbounds i8, ptr %b, i32 %inc.epil
+ %33 = load i8, ptr %arrayidx1.epil.1, align 1
%add.epil.1 = add i8 %33, %32
- %arrayidx4.epil.1 = getelementptr inbounds i8, i8* %res, i32 %inc.epil
- store i8 %add.epil.1, i8* %arrayidx4.epil.1, align 1
+ %arrayidx4.epil.1 = getelementptr inbounds i8, ptr %res, i32 %inc.epil
+ store i8 %add.epil.1, ptr %arrayidx4.epil.1, align 1
%inc.epil.1 = add nuw i32 %i.011.unr, 2
%epil.iter.cmp.1 = icmp eq i32 %xtraiter, 2
br i1 %epil.iter.cmp.1, label %for.cond.cleanup, label %for.body.epil.2
for.body.epil.2: ; preds = %for.body.epil.1
- %arrayidx.epil.2 = getelementptr inbounds i8, i8* %a, i32 %inc.epil.1
- %34 = load i8, i8* %arrayidx.epil.2, align 1
- %arrayidx1.epil.2 = getelementptr inbounds i8, i8* %b, i32 %inc.epil.1
- %35 = load i8, i8* %arrayidx1.epil.2, align 1
+ %arrayidx.epil.2 = getelementptr inbounds i8, ptr %a, i32 %inc.epil.1
+ %34 = load i8, ptr %arrayidx.epil.2, align 1
+ %arrayidx1.epil.2 = getelementptr inbounds i8, ptr %b, i32 %inc.epil.1
+ %35 = load i8, ptr %arrayidx1.epil.2, align 1
%add.epil.2 = add i8 %35, %34
- %arrayidx4.epil.2 = getelementptr inbounds i8, i8* %res, i32 %inc.epil.1
- store i8 %add.epil.2, i8* %arrayidx4.epil.2, align 1
+ %arrayidx4.epil.2 = getelementptr inbounds i8, ptr %res, i32 %inc.epil.1
+ store i8 %add.epil.2, ptr %arrayidx4.epil.2, align 1
br label %for.cond.cleanup
}
- declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) #1
- declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>) #2
+ declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>) #1
+ declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32 immarg, <16 x i1>) #2
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <16 x i1> @llvm.arm.mve.vctp8(i32) #4
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir
index 2d1c743d1025c..ec17e1e6721e8 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir
@@ -1,7 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
--- |
- define dso_local void @legal_vaddv_s32(i16* nocapture readonly %a, i32* %c, i32 %N) {
+ define dso_local void @legal_vaddv_s32(ptr nocapture readonly %a, ptr %c, i32 %N) {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -18,18 +18,18 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
- %store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %store.addr = phi ptr [ %c, %vector.ph ], [ %store.next, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
%tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
%tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp10)
- store i32 %tmp11, i32* %store.addr
- %store.next = getelementptr i32, i32* %store.addr, i32 1
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
+ store i32 %tmp11, ptr %store.addr
+ %store.next = getelementptr i32, ptr %store.addr, i32 1
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -39,7 +39,7 @@
ret void
}
- define dso_local void @legal_vaddv_s16(i16* nocapture readonly %a, i32* %c, i32 %N) {
+ define dso_local void @legal_vaddv_s16(ptr nocapture readonly %a, ptr %c, i32 %N) {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -56,18 +56,18 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
- %store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %store.addr = phi ptr [ %c, %vector.ph ], [ %store.next, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <8 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
%tmp8 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 8
- %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef)
+ %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef)
%sext = sext <8 x i16> %wide.masked.load to <8 x i32>
%tmp11 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %sext)
- store i32 %tmp11, i32* %store.addr
- %store.next = getelementptr i32, i32* %store.addr, i32 1
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 8
+ store i32 %tmp11, ptr %store.addr
+ %store.next = getelementptr i32, ptr %store.addr, i32 1
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 8
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -77,7 +77,7 @@
ret void
}
- define dso_local void @legal_vaddv_s8(i8* nocapture readonly %a, i32* %c, i32 %N) {
+ define dso_local void @legal_vaddv_s8(ptr nocapture readonly %a, ptr %c, i32 %N) {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 7
@@ -94,18 +94,18 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
- %store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %store.addr = phi ptr [ %c, %vector.ph ], [ %store.next, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i8* %lsr.iv to <16 x i8>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
%tmp8 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 16
- %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef)
+ %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef)
%sext = sext <16 x i8> %wide.masked.load to <16 x i32>
%tmp11 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %sext)
- store i32 %tmp11, i32* %store.addr
- %store.next = getelementptr i32, i32* %store.addr, i32 1
- %scevgep = getelementptr i8, i8* %lsr.iv, i32 16
+ store i32 %tmp11, ptr %store.addr
+ %store.next = getelementptr i32, ptr %store.addr, i32 1
+ %scevgep = getelementptr i8, ptr %lsr.iv, i32 16
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -115,7 +115,7 @@
ret void
}
- define dso_local i32 @legal_vaddva_s32(i16* nocapture readonly %a, i32 %N) {
+ define dso_local i32 @legal_vaddva_s32(ptr nocapture readonly %a, i32 %N) {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -132,17 +132,17 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
%acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
%tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
%tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp10)
%acc.next = add i32 %tmp11, %acc
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -153,7 +153,7 @@
ret i32 %res
}
- define dso_local void @illegal_vaddv_s32(i16* nocapture readonly %a, i32* %c, i32 %N) {
+ define dso_local void @illegal_vaddv_s32(ptr nocapture readonly %a, ptr %c, i32 %N) {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -170,19 +170,19 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
- %store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %store.addr = phi ptr [ %c, %vector.ph ], [ %store.next, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
%tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
%not = xor <4 x i32> %tmp10, <i32 -1, i32 -1, i32 -1, i32 -1>
%tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %not)
- store i32 %tmp11, i32* %store.addr
- %store.next = getelementptr i32, i32* %store.addr, i32 1
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
+ store i32 %tmp11, ptr %store.addr
+ %store.next = getelementptr i32, ptr %store.addr, i32 1
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -192,7 +192,7 @@
ret void
}
- define dso_local i32 @illegal_vaddva_s32(i16* nocapture readonly %a, i32 %N) {
+ define dso_local i32 @illegal_vaddva_s32(ptr nocapture readonly %a, i32 %N) {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -209,18 +209,18 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
%acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
%tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
%not = xor <4 x i32> %tmp10, <i32 -1, i32 -1, i32 -1, i32 -1>
%tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %not)
%acc.next = add i32 %tmp11, %acc
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -231,7 +231,7 @@
ret i32 %res
}
- define dso_local void @illegal_vaddv_u32(i16* nocapture readonly %a, i32* %c, i32 %N) {
+ define dso_local void @illegal_vaddv_u32(ptr nocapture readonly %a, ptr %c, i32 %N) {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -248,19 +248,19 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
- %store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %store.addr = phi ptr [ %c, %vector.ph ], [ %store.next, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
%tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = zext <4 x i16> %wide.masked.load to <4 x i32>
%not = xor <4 x i32> %tmp10, <i32 -1, i32 -1, i32 -1, i32 -1>
%tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %not)
- store i32 %tmp11, i32* %store.addr
- %store.next = getelementptr i32, i32* %store.addr, i32 1
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
+ store i32 %tmp11, ptr %store.addr
+ %store.next = getelementptr i32, ptr %store.addr, i32 1
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -270,7 +270,7 @@
ret void
}
- define dso_local i32 @illegal_vaddva_u32(i16* nocapture readonly %a, i32 %N) {
+ define dso_local i32 @illegal_vaddva_u32(ptr nocapture readonly %a, i32 %N) {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -287,18 +287,18 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
%acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
%tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = zext <4 x i16> %wide.masked.load to <4 x i32>
%not = xor <4 x i32> %tmp10, <i32 -1, i32 -1, i32 -1, i32 -1>
%tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %not)
%acc.next = add i32 %tmp11, %acc
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -309,7 +309,7 @@
ret i32 %res
}
- define dso_local void @illegal_vaddv_s16(i8* nocapture readonly %a, i32* %c, i32 %N, <8 x i16> %pass) {
+ define dso_local void @illegal_vaddv_s16(ptr nocapture readonly %a, ptr %c, i32 %N, <8 x i16> %pass) {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -326,20 +326,20 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
- %store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %store.addr = phi ptr [ %c, %vector.ph ], [ %store.next, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i8* %lsr.iv to <8 x i8>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
%tmp8 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 8
- %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %lsr.iv17, i32 1, <8 x i1> %tmp8, <8 x i8> undef)
+ %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %lsr.iv17, i32 1, <8 x i1> %tmp8, <8 x i8> undef)
%sext.wide = sext <8 x i8> %wide.masked.load to <8 x i16>
%sub = sub <8 x i16> %sext.wide, %pass
%reduce = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %sub)
%sext.reduce = sext i16 %reduce to i32
- store i32 %sext.reduce, i32* %store.addr
- %store.next = getelementptr i32, i32* %store.addr, i32 1
- %scevgep = getelementptr i8, i8* %lsr.iv, i32 8
+ store i32 %sext.reduce, ptr %store.addr
+ %store.next = getelementptr i32, ptr %store.addr, i32 1
+ %scevgep = getelementptr i8, ptr %lsr.iv, i32 8
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -349,7 +349,7 @@
ret void
}
- define dso_local i32 @illegal_vaddva_s16(i8* nocapture readonly %a, i32 %N, <8 x i16> %pass) {
+ define dso_local i32 @illegal_vaddva_s16(ptr nocapture readonly %a, i32 %N, <8 x i16> %pass) {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -366,19 +366,19 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
%acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ]
- %lsr.iv17 = bitcast i8* %lsr.iv to <8 x i8>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
%tmp8 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 8
- %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %lsr.iv17, i32 1, <8 x i1> %tmp8, <8 x i8> undef)
+ %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %lsr.iv17, i32 1, <8 x i1> %tmp8, <8 x i8> undef)
%sext.wide = sext <8 x i8> %wide.masked.load to <8 x i16>
%sub = sub <8 x i16> %sext.wide, %pass
%reduce = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %sub)
%sext.reduce = sext i16 %reduce to i32
%acc.next = add i32 %sext.reduce, %acc
- %scevgep = getelementptr i8, i8* %lsr.iv, i32 8
+ %scevgep = getelementptr i8, ptr %lsr.iv, i32 8
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -389,7 +389,7 @@
ret i32 %res
}
- define dso_local void @illegal_vaddv_u16(i16* nocapture readonly %a, i32* %c, i32 %N, <8 x i16> %pass) {
+ define dso_local void @illegal_vaddv_u16(ptr nocapture readonly %a, ptr %c, i32 %N, <8 x i16> %pass) {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -406,19 +406,19 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
- %store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %store.addr = phi ptr [ %c, %vector.ph ], [ %store.next, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <8 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
%tmp8 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 8
- %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef)
+ %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef)
%sub = sub <8 x i16> %wide.masked.load, %pass
%reduce = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %sub)
%zext.reduce = zext i16 %reduce to i32
- store i32 %zext.reduce, i32* %store.addr
- %store.next = getelementptr i32, i32* %store.addr, i32 1
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 8
+ store i32 %zext.reduce, ptr %store.addr
+ %store.next = getelementptr i32, ptr %store.addr, i32 1
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 8
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -428,7 +428,7 @@
ret void
}
- define dso_local i32 @illegal_vaddva_u16(i16* nocapture readonly %a, i32 %N, <8 x i16> %pass) {
+ define dso_local i32 @illegal_vaddva_u16(ptr nocapture readonly %a, i32 %N, <8 x i16> %pass) {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -445,18 +445,18 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
%acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <8 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
%tmp8 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 8
- %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef)
+ %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef)
%sub = sub <8 x i16> %wide.masked.load, %pass
%reduce = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %sub)
%zext.reduce = zext i16 %reduce to i32
%acc.next = add i32 %zext.reduce, %acc
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 8
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 8
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -467,7 +467,7 @@
ret i32 %res
}
- define dso_local void @illegal_vaddv_s8(i8* nocapture readonly %a, i32* %c, i32 %N, <16 x i8> %pass) {
+ define dso_local void @illegal_vaddv_s8(ptr nocapture readonly %a, ptr %c, i32 %N, <16 x i8> %pass) {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 7
@@ -484,19 +484,19 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
- %store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %store.addr = phi ptr [ %c, %vector.ph ], [ %store.next, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i8* %lsr.iv to <16 x i8>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
%tmp8 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 16
- %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef)
+ %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef)
%xor = xor <16 x i8> %wide.masked.load, %pass
%reduce = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %xor)
%sext.reduce = sext i8 %reduce to i32
- store i32 %sext.reduce, i32* %store.addr
- %store.next = getelementptr i32, i32* %store.addr, i32 1
- %scevgep = getelementptr i8, i8* %lsr.iv, i32 16
+ store i32 %sext.reduce, ptr %store.addr
+ %store.next = getelementptr i32, ptr %store.addr, i32 1
+ %scevgep = getelementptr i8, ptr %lsr.iv, i32 16
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -506,7 +506,7 @@
ret void
}
- define dso_local i32 @illegal_vaddva_s8(i8* nocapture readonly %a, i32 %N, <16 x i8> %pass) {
+ define dso_local i32 @illegal_vaddva_s8(ptr nocapture readonly %a, i32 %N, <16 x i8> %pass) {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 7
@@ -523,18 +523,18 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
%acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ]
- %lsr.iv17 = bitcast i8* %lsr.iv to <16 x i8>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
%tmp8 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 16
- %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef)
+ %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef)
%xor = xor <16 x i8> %wide.masked.load, %pass
%reduce = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %xor)
%sext.reduce = sext i8 %reduce to i32
%acc.next = add i32 %sext.reduce, %acc
- %scevgep = getelementptr i8, i8* %lsr.iv, i32 16
+ %scevgep = getelementptr i8, ptr %lsr.iv, i32 16
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -545,7 +545,7 @@
ret i32 %res
}
- define dso_local void @illegal_vaddv_u8(i8* nocapture readonly %a, i32* %c, i32 %N, <16 x i8> %pass) {
+ define dso_local void @illegal_vaddv_u8(ptr nocapture readonly %a, ptr %c, i32 %N, <16 x i8> %pass) {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 7
@@ -562,19 +562,19 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
- %store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %store.addr = phi ptr [ %c, %vector.ph ], [ %store.next, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i8* %lsr.iv to <16 x i8>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
%tmp8 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 16
- %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef)
+ %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef)
%xor = xor <16 x i8> %wide.masked.load, %pass
%reduce = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %xor)
%zext.reduce = zext i8 %reduce to i32
- store i32 %zext.reduce, i32* %store.addr
- %store.next = getelementptr i32, i32* %store.addr, i32 1
- %scevgep = getelementptr i8, i8* %lsr.iv, i32 16
+ store i32 %zext.reduce, ptr %store.addr
+ %store.next = getelementptr i32, ptr %store.addr, i32 1
+ %scevgep = getelementptr i8, ptr %lsr.iv, i32 16
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -584,7 +584,7 @@
ret void
}
- define dso_local i32 @illegal_vaddva_u8(i8* nocapture readonly %a, i32 %N, <16 x i8> %pass) {
+ define dso_local i32 @illegal_vaddva_u8(ptr nocapture readonly %a, i32 %N, <16 x i8> %pass) {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 7
@@ -601,18 +601,18 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
%acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ]
- %lsr.iv17 = bitcast i8* %lsr.iv to <16 x i8>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
%tmp8 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 16
- %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef)
+ %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef)
%xor = xor <16 x i8> %wide.masked.load, %pass
%reduce = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %xor)
%zext.reduce = zext i8 %reduce to i32
%acc.next = add i32 %zext.reduce, %acc
- %scevgep = getelementptr i8, i8* %lsr.iv, i32 16
+ %scevgep = getelementptr i8, ptr %lsr.iv, i32 16
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -623,7 +623,7 @@
ret i32 %res
}
- define hidden i32 @regalloc_legality_vaddva_u32(i16* %x, i16* %y, i32 %n) {
+ define hidden i32 @regalloc_legality_vaddva_u32(ptr %x, ptr %y, i32 %n) {
entry:
%cmp22 = icmp sgt i32 %n, 0
%0 = add i32 %n, 3
@@ -639,23 +639,23 @@
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
- %x.addr.026 = phi i16* [ %add.ptr, %while.body ], [ %x, %while.body.preheader ]
- %y.addr.025 = phi i16* [ %add.ptr4, %while.body ], [ %y, %while.body.preheader ]
+ %x.addr.026 = phi ptr [ %add.ptr, %while.body ], [ %x, %while.body.preheader ]
+ %y.addr.025 = phi ptr [ %add.ptr4, %while.body ], [ %y, %while.body.preheader ]
%n.addr.023 = phi i32 [ %sub, %while.body ], [ %n, %while.body.preheader ]
%acc = phi i32 [ %acc.next, %while.body ], [ 0, %while.body.preheader ]
%5 = phi i32 [ %start, %while.body.preheader ], [ %6, %while.body ]
- %tmp3 = bitcast i16* %y.addr.025 to <4 x i16>*
- %tmp1 = bitcast i16* %x.addr.026 to <4 x i16>*
+ %tmp3 = bitcast ptr %y.addr.025 to ptr
+ %tmp1 = bitcast ptr %x.addr.026 to ptr
%tmp = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %n.addr.023)
- %tmp2 = tail call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %tmp1, i32 2, <4 x i1> %tmp, <4 x i16> zeroinitializer)
+ %tmp2 = tail call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %tmp1, i32 2, <4 x i1> %tmp, <4 x i16> zeroinitializer)
%zext.wide.1 = zext <4 x i16> %tmp2 to <4 x i32>
- %tmp4 = tail call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %tmp3, i32 2, <4 x i1> %tmp, <4 x i16> zeroinitializer)
+ %tmp4 = tail call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %tmp3, i32 2, <4 x i1> %tmp, <4 x i16> zeroinitializer)
%zext.wide.2 = zext <4 x i16> %tmp4 to <4 x i32>
%or = or <4 x i32> %zext.wide.1, %zext.wide.2
%reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %or)
%acc.next = add i32 %reduce, %acc
- %add.ptr = getelementptr inbounds i16, i16* %x.addr.026, i32 4
- %add.ptr4 = getelementptr inbounds i16, i16* %y.addr.025, i32 4
+ %add.ptr = getelementptr inbounds i16, ptr %x.addr.026, i32 4
+ %add.ptr4 = getelementptr inbounds i16, ptr %y.addr.025, i32 4
%sub = add nsw i32 %n.addr.023, -4
%6 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %5, i32 1)
%7 = icmp ne i32 %6, 0
@@ -666,7 +666,7 @@
ret i32 %res
}
- define hidden i32 @regalloc_legality_vaddv_u16(i16* %x, i16* %y, i32 %n) {
+ define hidden i32 @regalloc_legality_vaddv_u16(ptr %x, ptr %y, i32 %n) {
entry:
%cmp22 = icmp sgt i32 %n, 0
%0 = add i32 %n, 7
@@ -682,22 +682,22 @@
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
- %x.addr.026 = phi i16* [ %add.ptr, %while.body ], [ %x, %while.body.preheader ]
- %y.addr.025 = phi i16* [ %add.ptr4, %while.body ], [ %y, %while.body.preheader ]
+ %x.addr.026 = phi ptr [ %add.ptr, %while.body ], [ %x, %while.body.preheader ]
+ %y.addr.025 = phi ptr [ %add.ptr4, %while.body ], [ %y, %while.body.preheader ]
%n.addr.023 = phi i32 [ %sub, %while.body ], [ %n, %while.body.preheader ]
%acc = phi i32 [ %acc.next, %while.body ], [ 0, %while.body.preheader ]
%5 = phi i32 [ %start, %while.body.preheader ], [ %6, %while.body ]
- %tmp3 = bitcast i16* %y.addr.025 to <8 x i16>*
- %tmp1 = bitcast i16* %x.addr.026 to <8 x i16>*
+ %tmp3 = bitcast ptr %y.addr.025 to ptr
+ %tmp1 = bitcast ptr %x.addr.026 to ptr
%tmp = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %n.addr.023)
- %tmp2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp1, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer)
- %tmp4 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp3, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer)
+ %tmp2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp1, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer)
+ %tmp4 = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp3, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer)
%or = or <8 x i16> %tmp2, %tmp4
%reduce = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %or)
%zext.reduce = zext i16 %reduce to i32
%acc.next = add i32 %zext.reduce, %acc
- %add.ptr = getelementptr inbounds i16, i16* %x.addr.026, i32 8
- %add.ptr4 = getelementptr inbounds i16, i16* %y.addr.025, i32 8
+ %add.ptr = getelementptr inbounds i16, ptr %x.addr.026, i32 8
+ %add.ptr4 = getelementptr inbounds i16, ptr %y.addr.025, i32 8
%sub = add nsw i32 %n.addr.023, -8
%6 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %5, i32 1)
%7 = icmp ne i32 %6, 0
@@ -708,7 +708,7 @@
ret i32 %res
}
- define hidden i32 @regalloc_illegality_vaddva_s32(i16* %x, i16* %y, i16* %z, i32 %n) {
+ define hidden i32 @regalloc_illegality_vaddva_s32(ptr %x, ptr %y, ptr %z, i32 %n) {
entry:
%cmp22 = icmp sgt i32 %n, 0
%0 = add i32 %n, 7
@@ -724,23 +724,23 @@
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
- %x.addr.026 = phi i16* [ %add.ptr, %while.body ], [ %x, %while.body.preheader ]
- %y.addr.025 = phi i16* [ %add.ptr4, %while.body ], [ %y, %while.body.preheader ]
+ %x.addr.026 = phi ptr [ %add.ptr, %while.body ], [ %x, %while.body.preheader ]
+ %y.addr.025 = phi ptr [ %add.ptr4, %while.body ], [ %y, %while.body.preheader ]
%n.addr.023 = phi i32 [ %sub, %while.body ], [ %n, %while.body.preheader ]
%acc = phi i32 [ %acc.next, %while.body ], [ 0, %while.body.preheader ]
%5 = phi i32 [ %start, %while.body.preheader ], [ %6, %while.body ]
- %tmp3 = bitcast i16* %y.addr.025 to <8 x i16>*
- %tmp1 = bitcast i16* %x.addr.026 to <8 x i16>*
+ %tmp3 = bitcast ptr %y.addr.025 to ptr
+ %tmp1 = bitcast ptr %x.addr.026 to ptr
%tmp = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %n.addr.023)
- %tmp2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp1, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer)
- %tmp4 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp3, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer)
+ %tmp2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp1, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer)
+ %tmp4 = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp3, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer)
%tmp5 = tail call <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp4, i32 0, i32 1)
%tmp6 = tail call <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp4, i32 0, i32 0)
%mul = add <4 x i32> %tmp5, %tmp6
%reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %mul)
%acc.next = add i32 %reduce, %acc
- %add.ptr = getelementptr inbounds i16, i16* %x.addr.026, i32 8
- %add.ptr4 = getelementptr inbounds i16, i16* %y.addr.025, i32 8
+ %add.ptr = getelementptr inbounds i16, ptr %x.addr.026, i32 8
+ %add.ptr4 = getelementptr inbounds i16, ptr %y.addr.025, i32 8
%sub = add nsw i32 %n.addr.023, -8
%6 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %5, i32 1)
%7 = icmp ne i32 %6, 0
@@ -751,7 +751,7 @@
ret i32 %res
}
- define hidden i32 @illegal_vmull_non_zero(i16* %x, i16* %y, i16* %z, i32 %n) {
+ define hidden i32 @illegal_vmull_non_zero(ptr %x, ptr %y, ptr %z, i32 %n) {
entry:
%cmp22 = icmp sgt i32 %n, 0
%0 = add i32 %n, 7
@@ -767,21 +767,21 @@
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
- %x.addr.026 = phi i16* [ %add.ptr, %while.body ], [ %x, %while.body.preheader ]
- %y.addr.025 = phi i16* [ %add.ptr4, %while.body ], [ %y, %while.body.preheader ]
+ %x.addr.026 = phi ptr [ %add.ptr, %while.body ], [ %x, %while.body.preheader ]
+ %y.addr.025 = phi ptr [ %add.ptr4, %while.body ], [ %y, %while.body.preheader ]
%n.addr.023 = phi i32 [ %sub, %while.body ], [ %n, %while.body.preheader ]
%acc = phi i32 [ %acc.next, %while.body ], [ 0, %while.body.preheader ]
%5 = phi i32 [ %start, %while.body.preheader ], [ %6, %while.body ]
- %tmp3 = bitcast i16* %y.addr.025 to <8 x i16>*
- %tmp1 = bitcast i16* %x.addr.026 to <8 x i16>*
+ %tmp3 = bitcast ptr %y.addr.025 to ptr
+ %tmp1 = bitcast ptr %x.addr.026 to ptr
%tmp = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %n.addr.023)
- %tmp2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp1, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer)
- %tmp4 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp3, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer)
+ %tmp2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp1, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer)
+ %tmp4 = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp3, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer)
%mul = tail call <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp4, i32 0, i32 1)
%reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %mul)
%acc.next = add i32 %reduce, %acc
- %add.ptr = getelementptr inbounds i16, i16* %x.addr.026, i32 8
- %add.ptr4 = getelementptr inbounds i16, i16* %y.addr.025, i32 8
+ %add.ptr = getelementptr inbounds i16, ptr %x.addr.026, i32 8
+ %add.ptr4 = getelementptr inbounds i16, ptr %y.addr.025, i32 8
%sub = add nsw i32 %n.addr.023, -8
%6 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %5, i32 1)
%7 = icmp ne i32 %6, 0
@@ -792,12 +792,12 @@
ret i32 %res
}
- declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>)
- declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
- declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>)
- declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>)
- declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>)
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
+ declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32 immarg, <8 x i1>, <8 x i8>)
+ declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>)
+ declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>)
+ declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>)
+ declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>)
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vcmp-vpst-combination.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vcmp-vpst-combination.ll
index c8001df58e8cc..e0a61b1f9d956 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vcmp-vpst-combination.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vcmp-vpst-combination.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -tail-predication=force-enabled-no-reductions -o - %s | FileCheck %s
-define arm_aapcs_vfpcc <16 x i8> @vcmp_vpst_combination(<16 x i8>* %pSrc, i16 zeroext %blockSize, i8* nocapture %pResult, i32* nocapture %pIndex) {
+define arm_aapcs_vfpcc <16 x i8> @vcmp_vpst_combination(ptr %pSrc, i16 zeroext %blockSize, ptr nocapture %pResult, ptr nocapture %pIndex) {
; CHECK-LABEL: vcmp_vpst_combination:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -28,7 +28,7 @@ do.body: ; preds = %do.body, %entry
%curExtremValVec.0 = phi <16 x i8> [ <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>, %entry ], [ %6, %do.body ]
%blkCnt.0 = phi i32 [ %conv, %entry ], [ %sub2, %do.body ]
%2 = tail call <16 x i1> @llvm.arm.mve.vctp8(i32 %blkCnt.0)
- %3 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %pSrc, i32 1, <16 x i1> %2, <16 x i8> zeroinitializer)
+ %3 = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %pSrc, i32 1, <16 x i1> %2, <16 x i8> zeroinitializer)
%4 = icmp sle <16 x i8> %3, %curExtremValVec.0
%5 = and <16 x i1> %4, %2
%6 = tail call <16 x i8> @llvm.arm.mve.orr.predicated.v16i8.v16i1(<16 x i8> %3, <16 x i8> %3, <16 x i1> %5, <16 x i8> %curExtremValVec.0)
@@ -41,7 +41,7 @@ do.end: ; preds = %do.body
ret <16 x i8> %6
}
-define i32 @vcmp_new_vpst_combination(i32 %len, i32* nocapture readonly %arr) {
+define i32 @vcmp_new_vpst_combination(i32 %len, ptr nocapture readonly %arr) {
; CHECK-LABEL: vcmp_new_vpst_combination:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -82,9 +82,9 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i32 [ 0, %vector.ph ], [ %5, %vector.body ]
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %len)
- %0 = getelementptr inbounds i32, i32* %arr, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+ %0 = getelementptr inbounds i32, ptr %arr, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
%2 = icmp ne <4 x i32> %wide.masked.load, zeroinitializer
%narrow = and <4 x i1> %active.lane.mask, %2
%3 = zext <4 x i1> %narrow to <4 x i32>
@@ -101,7 +101,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
-declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
+declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
@@ -109,6 +109,6 @@ declare { <16 x i8>, i32 } @llvm.arm.mve.vidup.v16i8(i32, i32)
declare <16 x i1> @llvm.arm.mve.vctp8(i32)
-declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>)
+declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>)
declare <16 x i8> @llvm.arm.mve.orr.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir
index 32ea68ab3312a..25f64484d503b 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir
@@ -5,7 +5,7 @@
# then used by the add in the exit - making the vctp operands equivalent.
--- |
- define dso_local i32 @wrong_vctp_liveout(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
+ define dso_local i32 @wrong_vctp_liveout(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
entry:
%cmp9 = icmp eq i32 %N, 0
%0 = add i32 %N, 3
@@ -22,22 +22,22 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ]
%6 = phi i32 [ %N, %vector.ph ], [ %8, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
- %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr
%7 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %6)
%8 = sub i32 %6, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %7, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %7, <4 x i16> undef)
%9 = sext <4 x i16> %wide.masked.load to <4 x i32>
- %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %7, <4 x i16> undef)
+ %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %7, <4 x i16> undef)
%10 = sext <4 x i16> %wide.masked.load14 to <4 x i32>
%11 = mul nsw <4 x i32> %10, %9
%12 = add <4 x i32> %11, %vec.phi
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
- %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
+ %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4
%13 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%14 = icmp ne i32 %13, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -54,7 +54,7 @@
%res.0.lcssa = phi i32 [ 0, %entry ], [ %18, %middle.block ]
ret i32 %res.0.lcssa
}
- declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
+ declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir
index 1fb505bbfc7c7..ad3e5fd59de13 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir
@@ -3,7 +3,7 @@
--- |
; Function Attrs: nofree norecurse nounwind
- define dso_local void @test(i32* noalias nocapture %arg, i32* noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) local_unnamed_addr #0 {
+ define dso_local void @test(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) local_unnamed_addr #0 {
bb:
%tmp = icmp eq i32 %arg2, 0
%tmp1 = add i32 %arg2, 3
@@ -21,32 +21,32 @@
br label %bb9
bb9: ; preds = %bb9, %bb3
- %lsr.iv2 = phi i32* [ %scevgep3, %bb9 ], [ %arg1, %bb3 ]
- %lsr.iv = phi i32* [ %scevgep, %bb9 ], [ %arg, %bb3 ]
+ %lsr.iv2 = phi ptr [ %scevgep3, %bb9 ], [ %arg1, %bb3 ]
+ %lsr.iv = phi ptr [ %scevgep, %bb9 ], [ %arg, %bb3 ]
%tmp7 = phi i32 [ %start, %bb3 ], [ %tmp12, %bb9 ]
%tmp8 = phi i32 [ %arg2, %bb3 ], [ %tmp11, %bb9 ]
- %lsr.iv24 = bitcast i32* %lsr.iv2 to <4 x i32>*
- %lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>*
+ %lsr.iv24 = bitcast ptr %lsr.iv2 to ptr
+ %lsr.iv1 = bitcast ptr %lsr.iv to ptr
%vctp = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp8)
%and = and <4 x i1> %vctp, %invariant.mask
%tmp11 = sub i32 %tmp8, 4
- %tmp17 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv24, i32 4, <4 x i1> %and, <4 x i32> undef)
+ %tmp17 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv24, i32 4, <4 x i1> %and, <4 x i32> undef)
%tmp18 = icmp ne <4 x i32> %tmp17, zeroinitializer
%tmp20 = and <4 x i1> %tmp18, %vctp
- %tmp22 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1, i32 4, <4 x i1> %tmp20, <4 x i32> undef)
+ %tmp22 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1, i32 4, <4 x i1> %tmp20, <4 x i32> undef)
%tmp23 = mul nsw <4 x i32> %tmp22, %tmp17
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %tmp23, <4 x i32>* %lsr.iv1, i32 4, <4 x i1> %tmp20)
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %tmp23, ptr %lsr.iv1, i32 4, <4 x i1> %tmp20)
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %tmp7, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
- %scevgep = getelementptr i32, i32* %lsr.iv, i32 4
- %scevgep3 = getelementptr i32, i32* %lsr.iv2, i32 4
+ %scevgep = getelementptr i32, ptr %lsr.iv, i32 4
+ %scevgep3 = getelementptr i32, ptr %lsr.iv2, i32 4
br i1 %tmp13, label %bb9, label %bb27
bb27: ; preds = %bb9, %bb
ret void
}
- declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
+ declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt.mir
index 0615fce40b668..7af79712b9f0f 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s
--- |
; Function Attrs: nofree norecurse nounwind
- define dso_local void @test_vldr_p0(i32* noalias nocapture %arg, i32* noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) local_unnamed_addr #0 {
+ define dso_local void @test_vldr_p0(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) local_unnamed_addr #0 {
bb:
%tmp = icmp eq i32 %arg2, 0
%tmp1 = add i32 %arg2, 3
@@ -20,30 +20,30 @@
br label %bb9
bb9: ; preds = %bb9, %bb3
- %lsr.iv2 = phi i32* [ %scevgep3, %bb9 ], [ %arg1, %bb3 ]
- %lsr.iv = phi i32* [ %scevgep, %bb9 ], [ %arg, %bb3 ]
+ %lsr.iv2 = phi ptr [ %scevgep3, %bb9 ], [ %arg1, %bb3 ]
+ %lsr.iv = phi ptr [ %scevgep, %bb9 ], [ %arg, %bb3 ]
%tmp7 = phi i32 [ %start, %bb3 ], [ %tmp12, %bb9 ]
%tmp8 = phi i32 [ %arg2, %bb3 ], [ %tmp11, %bb9 ]
- %lsr.iv24 = bitcast i32* %lsr.iv2 to <4 x i32>*
- %lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>*
+ %lsr.iv24 = bitcast ptr %lsr.iv2 to ptr
+ %lsr.iv1 = bitcast ptr %lsr.iv to ptr
%vctp = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp8)
%and = and <4 x i1> %vctp, %invariant.mask
%tmp11 = sub i32 %tmp8, 4
- %tmp17 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv24, i32 4, <4 x i1> %and, <4 x i32> undef)
- %tmp22 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1, i32 4, <4 x i1> %and, <4 x i32> undef)
+ %tmp17 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv24, i32 4, <4 x i1> %and, <4 x i32> undef)
+ %tmp22 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1, i32 4, <4 x i1> %and, <4 x i32> undef)
%tmp23 = mul nsw <4 x i32> %tmp22, %tmp17
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %tmp23, <4 x i32>* %lsr.iv1, i32 4, <4 x i1> %and)
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %tmp23, ptr %lsr.iv1, i32 4, <4 x i1> %and)
%tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %tmp7, i32 1)
%tmp13 = icmp ne i32 %tmp12, 0
- %scevgep = getelementptr i32, i32* %lsr.iv, i32 4
- %scevgep3 = getelementptr i32, i32* %lsr.iv2, i32 4
+ %scevgep = getelementptr i32, ptr %lsr.iv, i32 4
+ %scevgep3 = getelementptr i32, ptr %lsr.iv2, i32 4
br i1 %tmp13, label %bb9, label %bb27
bb27: ; preds = %bb9, %bb
ret void
}
- define dso_local void @test_vstr_p0(i32* noalias nocapture %arg, i32* noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) {
+ define dso_local void @test_vstr_p0(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) {
bb:
unreachable
bb3: ; preds = %bb
@@ -54,7 +54,7 @@
ret void
}
- define dso_local void @test_vmsr_p0(i32* noalias nocapture %arg, i32* noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) {
+ define dso_local void @test_vmsr_p0(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) {
bb:
unreachable
bb3: ; preds = %bb
@@ -65,7 +65,7 @@
ret void
}
- define dso_local void @test_vmrs_p0(i32* noalias nocapture %arg, i32* noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) {
+ define dso_local void @test_vmrs_p0(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) {
bb:
unreachable
bb3: ; preds = %bb
@@ -76,8 +76,8 @@
ret void
}
- declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #1
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2
+ declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #1
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #2
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subi3.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subi3.mir
index a9f4d7c1f8126..5153320d63674 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subi3.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subi3.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve,+lob -run-pass=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s
--- |
- define dso_local void @vctp_tsubi3(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
+ define dso_local void @vctp_tsubi3(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
entry:
%cmp8 = icmp sgt i32 %N, 0
%0 = add i32 %N, 3
@@ -18,23 +18,23 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv17 = phi i32* [ %scevgep18, %vector.body ], [ %A, %vector.ph ]
- %lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
- %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ]
+ %lsr.iv17 = phi ptr [ %scevgep18, %vector.body ], [ %A, %vector.ph ]
+ %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %B, %vector.ph ]
%6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
- %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>*
- %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
- %lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>*
+ %lsr.iv13 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr
+ %lsr.iv1719 = bitcast ptr %lsr.iv17 to ptr
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
%9 = sub i32 %7, 5
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef)
- %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef)
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef)
+ %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef)
%10 = add nsw <4 x i32> %wide.masked.load12, %wide.masked.load
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %lsr.iv1719, i32 4, <4 x i1> %8)
- %scevgep = getelementptr i32, i32* %lsr.iv, i32 4
- %scevgep15 = getelementptr i32, i32* %lsr.iv14, i32 4
- %scevgep18 = getelementptr i32, i32* %lsr.iv17, i32 4
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %10, ptr %lsr.iv1719, i32 4, <4 x i1> %8)
+ %scevgep = getelementptr i32, ptr %lsr.iv, i32 4
+ %scevgep15 = getelementptr i32, ptr %lsr.iv14, i32 4
+ %scevgep18 = getelementptr i32, ptr %lsr.iv17, i32 4
%11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1)
%12 = icmp ne i32 %11, 0
br i1 %12, label %vector.body, label %for.cond.cleanup
@@ -45,9 +45,9 @@
declare i32 @llvm.start.loop.iterations.i32(i32) #1
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #2
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
- declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3
- declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #4
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #3
+ declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #3
+ declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #4
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #3
...
---
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri.mir
index d995f11b6c0e1..303d93652a287 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve,+lob -run-pass=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s
--- |
- define dso_local void @vctp_tsubi3(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
+ define dso_local void @vctp_tsubi3(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
entry:
%cmp8 = icmp sgt i32 %N, 0
%0 = add i32 %N, 3
@@ -18,23 +18,23 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv17 = phi i32* [ %scevgep18, %vector.body ], [ %A, %vector.ph ]
- %lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
- %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ]
+ %lsr.iv17 = phi ptr [ %scevgep18, %vector.body ], [ %A, %vector.ph ]
+ %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %B, %vector.ph ]
%6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
- %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>*
- %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
- %lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>*
+ %lsr.iv13 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr
+ %lsr.iv1719 = bitcast ptr %lsr.iv17 to ptr
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
%9 = sub i32 %7, 5
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef)
- %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef)
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef)
+ %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef)
%10 = add nsw <4 x i32> %wide.masked.load12, %wide.masked.load
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %lsr.iv1719, i32 4, <4 x i1> %8)
- %scevgep = getelementptr i32, i32* %lsr.iv, i32 4
- %scevgep15 = getelementptr i32, i32* %lsr.iv14, i32 4
- %scevgep18 = getelementptr i32, i32* %lsr.iv17, i32 4
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %10, ptr %lsr.iv1719, i32 4, <4 x i1> %8)
+ %scevgep = getelementptr i32, ptr %lsr.iv, i32 4
+ %scevgep15 = getelementptr i32, ptr %lsr.iv14, i32 4
+ %scevgep18 = getelementptr i32, ptr %lsr.iv17, i32 4
%11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1)
%12 = icmp ne i32 %11, 0
br i1 %12, label %vector.body, label %for.cond.cleanup
@@ -45,8 +45,8 @@
declare i32 @llvm.start.loop.iterations.i32(i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
- declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
+ declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
...
---
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri12.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri12.mir
index 48e161ded90fd..2516e5bf290bf 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri12.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri12.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve,+lob -run-pass=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s
--- |
- define dso_local void @vctp_tsubi3(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
+ define dso_local void @vctp_tsubi3(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
entry:
%cmp8 = icmp sgt i32 %N, 0
%0 = add i32 %N, 3
@@ -18,23 +18,23 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv17 = phi i32* [ %scevgep18, %vector.body ], [ %A, %vector.ph ]
- %lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
- %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ]
+ %lsr.iv17 = phi ptr [ %scevgep18, %vector.body ], [ %A, %vector.ph ]
+ %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %B, %vector.ph ]
%6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
- %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>*
- %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
- %lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>*
+ %lsr.iv13 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr
+ %lsr.iv1719 = bitcast ptr %lsr.iv17 to ptr
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
%9 = sub i32 %7, 5
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef)
- %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef)
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef)
+ %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef)
%10 = add nsw <4 x i32> %wide.masked.load12, %wide.masked.load
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %lsr.iv1719, i32 4, <4 x i1> %8)
- %scevgep = getelementptr i32, i32* %lsr.iv, i32 4
- %scevgep15 = getelementptr i32, i32* %lsr.iv14, i32 4
- %scevgep18 = getelementptr i32, i32* %lsr.iv17, i32 4
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %10, ptr %lsr.iv1719, i32 4, <4 x i1> %8)
+ %scevgep = getelementptr i32, ptr %lsr.iv, i32 4
+ %scevgep15 = getelementptr i32, ptr %lsr.iv14, i32 4
+ %scevgep18 = getelementptr i32, ptr %lsr.iv17, i32 4
%11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1)
%12 = icmp ne i32 %11, 0
br i1 %12, label %vector.body, label %for.cond.cleanup
@@ -45,8 +45,8 @@
declare i32 @llvm.start.loop.iterations.i32(i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
- declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
+ declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
...
---
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir
index 3b2e776e13dc7..4b015f81b8f32 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s
--- |
- define dso_local arm_aapcs_vfpcc signext i16 @wrong_liveout_shift(i8* nocapture readonly %b, i8* nocapture readonly %c, i32 %N) {
+ define dso_local arm_aapcs_vfpcc signext i16 @wrong_liveout_shift(ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) {
entry:
%cmp11 = icmp eq i32 %N, 0
%0 = add i32 %N, 7
@@ -20,23 +20,23 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv20 = phi i8* [ %scevgep21, %vector.body ], [ %c, %vector.ph ]
- %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv20 = phi ptr [ %scevgep21, %vector.body ], [ %c, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %b, %vector.ph ]
%vec.phi = phi <8 x i16> [ <i16 32767, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, %vector.ph ], [ %15, %vector.body ]
%8 = phi i32 [ %start, %vector.ph ], [ %16, %vector.body ]
%9 = phi i32 [ %N, %vector.ph ], [ %11, %vector.body ]
- %lsr.iv2022 = bitcast i8* %lsr.iv20 to <8 x i8>*
- %lsr.iv19 = bitcast i8* %lsr.iv to <8 x i8>*
+ %lsr.iv2022 = bitcast ptr %lsr.iv20 to ptr
+ %lsr.iv19 = bitcast ptr %lsr.iv to ptr
%10 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %9)
%11 = sub i32 %9, 8
- %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %lsr.iv19, i32 1, <8 x i1> %10, <8 x i8> undef)
+ %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %lsr.iv19, i32 1, <8 x i1> %10, <8 x i8> undef)
%12 = zext <8 x i8> %wide.masked.load to <8 x i16>
- %wide.masked.load16 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %lsr.iv2022, i32 1, <8 x i1> %10, <8 x i8> undef)
+ %wide.masked.load16 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %lsr.iv2022, i32 1, <8 x i1> %10, <8 x i8> undef)
%13 = zext <8 x i8> %wide.masked.load16 to <8 x i16>
%14 = mul nuw <8 x i16> %13, %12
%15 = sub <8 x i16> %vec.phi, %14
- %scevgep = getelementptr i8, i8* %lsr.iv, i32 8
- %scevgep21 = getelementptr i8, i8* %lsr.iv20, i32 8
+ %scevgep = getelementptr i8, ptr %lsr.iv, i32 8
+ %scevgep21 = getelementptr i8, ptr %lsr.iv20, i32 8
%16 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %8, i32 1)
%17 = icmp ne i32 %16, 0
br i1 %17, label %vector.body, label %middle.block
@@ -53,7 +53,7 @@
%a.0.lcssa = phi i16 [ 32767, %entry ], [ %20, %middle.block ]
ret i16 %a.0.lcssa
}
- declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>)
+ declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32 immarg, <8 x i1>, <8 x i8>)
declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll
index 015af0b409777..0f1420068ab29 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=armv8.1m.main -mattr=+mve -tail-predication=enabled --verify-machineinstrs %s -o - | FileCheck %s
-define dso_local i32 @mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) {
+define dso_local i32 @mul_reduce_add(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) {
; CHECK-LABEL: mul_reduce_add:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r2, #0
@@ -44,13 +44,13 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %6, %vector.body ]
- %0 = getelementptr inbounds i32, i32* %a, i32 %index
+ %0 = getelementptr inbounds i32, ptr %a, i32 %index
%1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
- %2 = bitcast i32* %0 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef)
- %3 = getelementptr inbounds i32, i32* %b, i32 %index
- %4 = bitcast i32* %3 to <4 x i32>*
- %wide.masked.load13 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %4, i32 4, <4 x i1> %1, <4 x i32> undef)
+ %2 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %2, i32 4, <4 x i1> %1, <4 x i32> undef)
+ %3 = getelementptr inbounds i32, ptr %b, i32 %index
+ %4 = bitcast ptr %3 to ptr
+ %wide.masked.load13 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %4, i32 4, <4 x i1> %1, <4 x i32> undef)
%5 = mul nsw <4 x i32> %wide.masked.load13, %wide.masked.load
%6 = add nsw <4 x i32> %5, %vec.phi
%index.next = add i32 %index, 4
@@ -67,7 +67,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr
ret i32 %res.0.lcssa
}
-define dso_local i32 @mul_reduce_add_const(i32* noalias nocapture readonly %a, i32 %b, i32 %N) {
+define dso_local i32 @mul_reduce_add_const(ptr noalias nocapture readonly %a, i32 %b, i32 %N) {
; CHECK-LABEL: mul_reduce_add_const:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r2, #0
@@ -108,10 +108,10 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %3, %vector.body ]
- %0 = getelementptr inbounds i32, i32* %a, i32 %index
+ %0 = getelementptr inbounds i32, ptr %a, i32 %index
%1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
- %2 = bitcast i32* %0 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef)
+ %2 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %2, i32 4, <4 x i1> %1, <4 x i32> undef)
%3 = add nsw <4 x i32> %wide.masked.load, %vec.phi
%index.next = add i32 %index, 4
%4 = icmp eq i32 %index.next, %n.vec
@@ -127,7 +127,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr
ret i32 %res.0.lcssa
}
-define dso_local i32 @add_reduce_add_const(i32* noalias nocapture readonly %a, i32 %b, i32 %N) {
+define dso_local i32 @add_reduce_add_const(ptr noalias nocapture readonly %a, i32 %b, i32 %N) {
; CHECK-LABEL: add_reduce_add_const:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r2, #0
@@ -168,10 +168,10 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %3, %vector.body ]
- %0 = getelementptr inbounds i32, i32* %a, i32 %index
+ %0 = getelementptr inbounds i32, ptr %a, i32 %index
%1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
- %2 = bitcast i32* %0 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef)
+ %2 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %2, i32 4, <4 x i1> %1, <4 x i32> undef)
%3 = add nsw <4 x i32> %wide.masked.load, %vec.phi
%index.next = add i32 %index, 4
%4 = icmp eq i32 %index.next, %n.vec
@@ -187,7 +187,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr
ret i32 %res.0.lcssa
}
-define dso_local void @vector_mul_const(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %c, i32 %N) {
+define dso_local void @vector_mul_const(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i32 %c, i32 %N) {
; CHECK-LABEL: vector_mul_const:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r7, lr}
@@ -217,14 +217,14 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds i32, i32* %b, i32 %index
+ %0 = getelementptr inbounds i32, ptr %b, i32 %index
%1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
- %2 = bitcast i32* %0 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef)
+ %2 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %2, i32 4, <4 x i1> %1, <4 x i32> undef)
%3 = mul nsw <4 x i32> %wide.masked.load, %broadcast.splat11
- %4 = getelementptr inbounds i32, i32* %a, i32 %index
- %5 = bitcast i32* %4 to <4 x i32>*
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %5, i32 4, <4 x i1> %1)
+ %4 = getelementptr inbounds i32, ptr %a, i32 %index
+ %5 = bitcast ptr %4 to ptr
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %5, i32 4, <4 x i1> %1)
%index.next = add i32 %index, 4
%6 = icmp eq i32 %index.next, %n.vec
br i1 %6, label %for.cond.cleanup, label %vector.body
@@ -233,7 +233,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret void
}
-define dso_local void @vector_add_const(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %c, i32 %N) {
+define dso_local void @vector_add_const(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i32 %c, i32 %N) {
; CHECK-LABEL: vector_add_const:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r7, lr}
@@ -263,14 +263,14 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds i32, i32* %b, i32 %index
+ %0 = getelementptr inbounds i32, ptr %b, i32 %index
%1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
- %2 = bitcast i32* %0 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef)
+ %2 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %2, i32 4, <4 x i1> %1, <4 x i32> undef)
%3 = add nsw <4 x i32> %wide.masked.load, %broadcast.splat11
- %4 = getelementptr inbounds i32, i32* %a, i32 %index
- %5 = bitcast i32* %4 to <4 x i32>*
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %5, i32 4, <4 x i1> %1)
+ %4 = getelementptr inbounds i32, ptr %a, i32 %index
+ %5 = bitcast ptr %4 to ptr
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %5, i32 4, <4 x i1> %1)
%index.next = add i32 %index, 4
%6 = icmp eq i32 %index.next, %n.vec
br i1 %6, label %for.cond.cleanup, label %vector.body
@@ -279,7 +279,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret void
}
-define dso_local arm_aapcs_vfpcc void @vector_mul_vector_i8(i8* noalias nocapture %a, i8* noalias nocapture readonly %b, i8* noalias nocapture readonly %c, i32 %N) {
+define dso_local arm_aapcs_vfpcc void @vector_mul_vector_i8(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c, i32 %N) {
; CHECK-LABEL: vector_mul_vector_i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r7, lr}
@@ -308,17 +308,17 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds i8, i8* %b, i32 %index
+ %0 = getelementptr inbounds i8, ptr %b, i32 %index
%1 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %N)
- %2 = bitcast i8* %0 to <16 x i8>*
- %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %2, i32 1, <16 x i1> %1, <16 x i8> undef)
- %3 = getelementptr inbounds i8, i8* %c, i32 %index
- %4 = bitcast i8* %3 to <16 x i8>*
- %wide.masked.load14 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %4, i32 1, <16 x i1> %1, <16 x i8> undef)
+ %2 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %2, i32 1, <16 x i1> %1, <16 x i8> undef)
+ %3 = getelementptr inbounds i8, ptr %c, i32 %index
+ %4 = bitcast ptr %3 to ptr
+ %wide.masked.load14 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %4, i32 1, <16 x i1> %1, <16 x i8> undef)
%5 = mul <16 x i8> %wide.masked.load14, %wide.masked.load
- %6 = getelementptr inbounds i8, i8* %a, i32 %index
- %7 = bitcast i8* %6 to <16 x i8>*
- call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %5, <16 x i8>* %7, i32 1, <16 x i1> %1)
+ %6 = getelementptr inbounds i8, ptr %a, i32 %index
+ %7 = bitcast ptr %6 to ptr
+ call void @llvm.masked.store.v16i8.p0(<16 x i8> %5, ptr %7, i32 1, <16 x i1> %1)
%index.next = add i32 %index, 16
%8 = icmp eq i32 %index.next, %n.vec
br i1 %8, label %for.cond.cleanup, label %vector.body
@@ -328,7 +328,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
}
; Function Attrs: nofree norecurse nounwind
-define dso_local arm_aapcs_vfpcc void @vector_mul_vector_i16(i16* noalias nocapture %a, i16* noalias nocapture readonly %b, i16* noalias nocapture readonly %c, i32 %N) local_unnamed_addr #0 {
+define dso_local arm_aapcs_vfpcc void @vector_mul_vector_i16(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c, i32 %N) local_unnamed_addr #0 {
; CHECK-LABEL: vector_mul_vector_i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r7, lr}
@@ -357,17 +357,17 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds i16, i16* %b, i32 %index
+ %0 = getelementptr inbounds i16, ptr %b, i32 %index
%1 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N)
- %2 = bitcast i16* %0 to <8 x i16>*
- %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %2, i32 2, <8 x i1> %1, <8 x i16> undef)
- %3 = getelementptr inbounds i16, i16* %c, i32 %index
- %4 = bitcast i16* %3 to <8 x i16>*
- %wide.masked.load14 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %4, i32 2, <8 x i1> %1, <8 x i16> undef)
+ %2 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %2, i32 2, <8 x i1> %1, <8 x i16> undef)
+ %3 = getelementptr inbounds i16, ptr %c, i32 %index
+ %4 = bitcast ptr %3 to ptr
+ %wide.masked.load14 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %4, i32 2, <8 x i1> %1, <8 x i16> undef)
%5 = mul <8 x i16> %wide.masked.load14, %wide.masked.load
- %6 = getelementptr inbounds i16, i16* %a, i32 %index
- %7 = bitcast i16* %6 to <8 x i16>*
- call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %5, <8 x i16>* %7, i32 2, <8 x i1> %1)
+ %6 = getelementptr inbounds i16, ptr %a, i32 %index
+ %7 = bitcast ptr %6 to ptr
+ call void @llvm.masked.store.v8i16.p0(<8 x i16> %5, ptr %7, i32 2, <8 x i1> %1)
%index.next = add i32 %index, 8
%8 = icmp eq i32 %index.next, %n.vec
br i1 %8, label %for.cond.cleanup, label %vector.body
@@ -376,12 +376,12 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret void
}
-declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>)
-declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>)
-declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
-declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>)
-declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>)
-declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
+declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
+declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32 immarg, <16 x i1>)
+declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>)
+declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmaxmin_vpred_r.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmaxmin_vpred_r.mir
index e6e6834fe0087..1f9cde8789a3f 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmaxmin_vpred_r.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmaxmin_vpred_r.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s
--- |
- define hidden i32 @arm_elementwise_mul_s8(i8* %input_1_vect, i8* %input_2_vect, i32 %input_1_offset, i32 %input_2_offset, i8* %output, i32 %out_offset, i32 %out_mult, i32 %out_shift, i32 %out_activation_min, i32 %out_activation_max, i32 %block_size) local_unnamed_addr #0 {
+ define hidden i32 @arm_elementwise_mul_s8(ptr %input_1_vect, ptr %input_2_vect, i32 %input_1_offset, i32 %input_2_offset, ptr %output, i32 %out_offset, i32 %out_mult, i32 %out_shift, i32 %out_activation_min, i32 %out_activation_max, i32 %block_size) local_unnamed_addr #0 {
entry:
%add = add i32 %block_size, 3
%div = lshr i32 %add, 2
@@ -20,22 +20,22 @@
ret i32 0
for.body: ; preds = %for.body, %for.body.lr.ph
- %input_1_vect.addr.052 = phi i8* [ %input_1_vect, %for.body.lr.ph ], [ %add.ptr, %for.body ]
- %input_2_vect.addr.051 = phi i8* [ %input_2_vect, %for.body.lr.ph ], [ %add.ptr14, %for.body ]
- %output.addr.050 = phi i8* [ %output, %for.body.lr.ph ], [ %add.ptr15, %for.body ]
+ %input_1_vect.addr.052 = phi ptr [ %input_1_vect, %for.body.lr.ph ], [ %add.ptr, %for.body ]
+ %input_2_vect.addr.051 = phi ptr [ %input_2_vect, %for.body.lr.ph ], [ %add.ptr14, %for.body ]
+ %output.addr.050 = phi ptr [ %output, %for.body.lr.ph ], [ %add.ptr15, %for.body ]
%num_elements.049 = phi i32 [ %block_size, %for.body.lr.ph ], [ %sub, %for.body ]
%iv = phi i32 [ %div, %for.body.lr.ph ], [ %iv.next, %for.body ]
- %output_cast = bitcast i8* %output.addr.050 to <4 x i32>*
- %input_2_cast = bitcast i8* %input_2_vect.addr.051 to <4 x i32>*
- %input_1_cast = bitcast i8* %input_1_vect.addr.052 to <4 x i32>*
+ %output_cast = bitcast ptr %output.addr.050 to ptr
+ %input_2_cast = bitcast ptr %input_2_vect.addr.051 to ptr
+ %input_1_cast = bitcast ptr %input_1_vect.addr.052 to ptr
%pred = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %num_elements.049)
- %load.1 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %input_1_cast, i32 4, <4 x i1> %pred, <4 x i32> undef)
+ %load.1 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %input_1_cast, i32 4, <4 x i1> %pred, <4 x i32> undef)
%insert.input_1_offset = insertelement <4 x i32> undef, i32 %input_1_offset, i32 0
%splat.input_1_offset = shufflevector <4 x i32> %insert.input_1_offset, <4 x i32> undef, <4 x i32> zeroinitializer
%insert.input_2_offset = insertelement <4 x i32> undef, i32 %input_2_offset, i32 0
%splat.input_2_offset = shufflevector <4 x i32> %insert.input_2_offset, <4 x i32> undef, <4 x i32> zeroinitializer
%add.1 = add <4 x i32> %load.1, %splat.input_1_offset
- %load.2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %input_2_cast, i32 4, <4 x i1> %pred, <4 x i32> undef)
+ %load.2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %input_2_cast, i32 4, <4 x i1> %pred, <4 x i32> undef)
%add.2 = add <4 x i32> %load.2, %splat.input_2_offset
%mul = mul <4 x i32> %add.1, %add.2
%insert.output = insertelement <4 x i32> undef, i32 %out_offset, i32 0
@@ -43,23 +43,23 @@
%add7 = add <4 x i32> %mul, %splat.output
%max = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %add7, <4 x i32> %.splat.i42, i32 1, <4 x i1> %pred, <4 x i32> undef)
%min = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %max, <4 x i32> %.splat.i, i32 1, <4 x i1> %pred, <4 x i32> undef)
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %min, <4 x i32>* %output_cast, i32 4, <4 x i1> %pred)
- %add.ptr = getelementptr inbounds i8, i8* %input_1_vect.addr.052, i32 4
- %add.ptr14 = getelementptr inbounds i8, i8* %input_2_vect.addr.051, i32 4
- %add.ptr15 = getelementptr inbounds i8, i8* %output.addr.050, i32 4
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %min, ptr %output_cast, i32 4, <4 x i1> %pred)
+ %add.ptr = getelementptr inbounds i8, ptr %input_1_vect.addr.052, i32 4
+ %add.ptr14 = getelementptr inbounds i8, ptr %input_2_vect.addr.051, i32 4
+ %add.ptr15 = getelementptr inbounds i8, ptr %output.addr.050, i32 4
%sub = add i32 %num_elements.049, -4
%iv.next = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %iv, i32 1)
%cmp = icmp ne i32 %iv.next, 0
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1
- declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #2
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #3
+ declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #2
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #3
declare <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) #1
declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) #1
declare i1 @llvm.test.set.loop.iterations.i32(i32) #4
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #4
- declare void @llvm.stackprotector(i8*, i8**) #5
+ declare void @llvm.stackprotector(ptr, ptr) #5
...
---
name: arm_elementwise_mul_s8
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir
index 082095f713f4f..4d3593a2c27cb 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
--- |
- define hidden i32 @vmldava_in_vpt(i8* %input_1_vect, i8* %input_2_vect, i32 %input_1_offset, i32 %input_2_offset, i32 %out_offset, i32 %out_mult, i32 %out_shift, i32 %out_activation_min, i32 %out_activation_max, i32 %block_size) local_unnamed_addr #0 {
+ define hidden i32 @vmldava_in_vpt(ptr %input_1_vect, ptr %input_2_vect, i32 %input_1_offset, i32 %input_2_offset, i32 %out_offset, i32 %out_mult, i32 %out_shift, i32 %out_activation_min, i32 %out_activation_max, i32 %block_size) local_unnamed_addr #0 {
entry:
%add = add i32 %block_size, 3
%div = lshr i32 %add, 2
@@ -24,20 +24,20 @@
for.body: ; preds = %for.body, %for.body.lr.ph
%lsr.iv = phi i32 [ %iv.next, %for.body ], [ %wls0, %for.body.lr.ph ]
- %input_1_vect.addr.052 = phi i8* [ %input_1_vect, %for.body.lr.ph ], [ %add.ptr, %for.body ]
- %input_2_vect.addr.051 = phi i8* [ %input_2_vect, %for.body.lr.ph ], [ %add.ptr14, %for.body ]
+ %input_1_vect.addr.052 = phi ptr [ %input_1_vect, %for.body.lr.ph ], [ %add.ptr, %for.body ]
+ %input_2_vect.addr.051 = phi ptr [ %input_2_vect, %for.body.lr.ph ], [ %add.ptr14, %for.body ]
%num_elements.049 = phi i32 [ %block_size, %for.body.lr.ph ], [ %sub, %for.body ]
%acc = phi i32 [ 0, %for.body.lr.ph ], [ %acc.next, %for.body ]
- %input_2_cast = bitcast i8* %input_2_vect.addr.051 to <4 x i32>*
- %input_1_cast = bitcast i8* %input_1_vect.addr.052 to <4 x i32>*
+ %input_2_cast = bitcast ptr %input_2_vect.addr.051 to ptr
+ %input_1_cast = bitcast ptr %input_1_vect.addr.052 to ptr
%pred = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %num_elements.049)
- %load.1 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %input_1_cast, i32 4, <4 x i1> %pred, <4 x i32> undef)
+ %load.1 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %input_1_cast, i32 4, <4 x i1> %pred, <4 x i32> undef)
%insert.input_1_offset = insertelement <4 x i32> undef, i32 %input_1_offset, i32 0
%splat.input_1_offset = shufflevector <4 x i32> %insert.input_1_offset, <4 x i32> undef, <4 x i32> zeroinitializer
%insert.input_2_offset = insertelement <4 x i32> undef, i32 %input_2_offset, i32 0
%splat.input_2_offset = shufflevector <4 x i32> %insert.input_2_offset, <4 x i32> undef, <4 x i32> zeroinitializer
%add.1 = add <4 x i32> %load.1, %splat.input_1_offset
- %load.2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %input_2_cast, i32 4, <4 x i1> %pred, <4 x i32> undef)
+ %load.2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %input_2_cast, i32 4, <4 x i1> %pred, <4 x i32> undef)
%add.2 = add <4 x i32> %load.2, %splat.input_2_offset
%mul = mul <4 x i32> %add.1, %add.2
%insert.output = insertelement <4 x i32> undef, i32 %out_offset, i32 0
@@ -46,16 +46,16 @@
%max = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %add7, <4 x i32> %.splat.i42, i32 1, <4 x i1> %pred, <4 x i32> undef)
%min = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %max, <4 x i32> %.splat.i, i32 1, <4 x i1> %pred, <4 x i32> undef)
%acc.next = call i32 @llvm.arm.mve.vmldava.predicated.v4i32.v4i1(i32 0, i32 0, i32 0, i32 %acc, <4 x i32> %min, <4 x i32> %max, <4 x i1> %pred)
- %add.ptr = getelementptr inbounds i8, i8* %input_1_vect.addr.052, i32 4
- %add.ptr14 = getelementptr inbounds i8, i8* %input_2_vect.addr.051, i32 4
+ %add.ptr = getelementptr inbounds i8, ptr %input_1_vect.addr.052, i32 4
+ %add.ptr14 = getelementptr inbounds i8, ptr %input_2_vect.addr.051, i32 4
%sub = add i32 %num_elements.049, -4
%iv.next = call i32 @llvm.loop.decrement.reg.i32(i32 %lsr.iv, i32 1)
%cmp = icmp ne i32 %iv.next, 0
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1
- declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #2
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #3
+ declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #2
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #3
declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) #1
declare <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) #1
declare i32 @llvm.arm.mve.vmldava.predicated.v4i32.v4i1(i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i1>) #1
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-block-debug.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-block-debug.mir
index 9f2a7d9fb510b..34821c1d7e5cd 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-block-debug.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-block-debug.mir
@@ -7,37 +7,37 @@
%struct.arm_2d_size_t = type { i16, i16 }
- define void @__arm_2d_impl_rgb16_cl_key_1x1_paving_x_mirror_xx(i16* noalias %pSource, i16 signext %iSourceStride, i16* noalias %pTarget, i16 signext %iTargetStride, %struct.arm_2d_size_t* noalias nocapture readonly %ptSrcCopySize, %struct.arm_2d_size_t* noalias nocapture readonly %ptDstCopySize, i16 zeroext %Colour) local_unnamed_addr #0 !dbg !12 {
+ define void @__arm_2d_impl_rgb16_cl_key_1x1_paving_x_mirror_xx(ptr noalias %pSource, i16 signext %iSourceStride, ptr noalias %pTarget, i16 signext %iTargetStride, ptr noalias nocapture readonly %ptSrcCopySize, ptr noalias nocapture readonly %ptDstCopySize, i16 zeroext %Colour) local_unnamed_addr #0 !dbg !12 {
entry:
- call void @llvm.dbg.value(metadata i16* %pSource, metadata !33, metadata !DIExpression()), !dbg !62
+ call void @llvm.dbg.value(metadata ptr %pSource, metadata !33, metadata !DIExpression()), !dbg !62
call void @llvm.dbg.value(metadata i16 %iSourceStride, metadata !34, metadata !DIExpression()), !dbg !62
- call void @llvm.dbg.value(metadata i16* %pTarget, metadata !35, metadata !DIExpression()), !dbg !62
+ call void @llvm.dbg.value(metadata ptr %pTarget, metadata !35, metadata !DIExpression()), !dbg !62
call void @llvm.dbg.value(metadata i16 %iTargetStride, metadata !36, metadata !DIExpression()), !dbg !62
- call void @llvm.dbg.value(metadata %struct.arm_2d_size_t* %ptSrcCopySize, metadata !37, metadata !DIExpression()), !dbg !62
- call void @llvm.dbg.value(metadata %struct.arm_2d_size_t* %ptDstCopySize, metadata !38, metadata !DIExpression()), !dbg !62
+ call void @llvm.dbg.value(metadata ptr %ptSrcCopySize, metadata !37, metadata !DIExpression()), !dbg !62
+ call void @llvm.dbg.value(metadata ptr %ptDstCopySize, metadata !38, metadata !DIExpression()), !dbg !62
call void @llvm.dbg.value(metadata i16 %Colour, metadata !39, metadata !DIExpression()), !dbg !62
call void @llvm.dbg.value(metadata i16 undef, metadata !40, metadata !DIExpression(DW_OP_LLVM_convert, 16, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_stack_value)), !dbg !62
call void @llvm.dbg.value(metadata i32 0, metadata !43, metadata !DIExpression()), !dbg !63
- %iHeight = getelementptr inbounds %struct.arm_2d_size_t, %struct.arm_2d_size_t* %ptDstCopySize, i32 0, i32 1
- %0 = load i16, i16* %iHeight, align 2, !tbaa !64
+ %iHeight = getelementptr inbounds %struct.arm_2d_size_t, ptr %ptDstCopySize, i32 0, i32 1
+ %0 = load i16, ptr %iHeight, align 2, !tbaa !64
%conv1 = sext i16 %0 to i32
%conv8 = sext i16 %iSourceStride to i32
%conv10 = sext i16 %iTargetStride to i32
- call void @llvm.dbg.value(metadata i16* %pSource, metadata !33, metadata !DIExpression()), !dbg !62
- call void @llvm.dbg.value(metadata i16* %pTarget, metadata !35, metadata !DIExpression()), !dbg !62
+ call void @llvm.dbg.value(metadata ptr %pSource, metadata !33, metadata !DIExpression()), !dbg !62
+ call void @llvm.dbg.value(metadata ptr %pTarget, metadata !35, metadata !DIExpression()), !dbg !62
call void @llvm.dbg.value(metadata i32 0, metadata !43, metadata !DIExpression()), !dbg !63
%cmp34 = icmp sgt i16 %0, 0, !dbg !69
br i1 %cmp34, label %for.body.lr.ph, label %for.cond.cleanup, !dbg !70
for.body.lr.ph: ; preds = %entry
- %iWidth2 = bitcast %struct.arm_2d_size_t* %ptSrcCopySize to i16*, !dbg !71
- %1 = load i16, i16* %iWidth2, align 2, !dbg !71, !tbaa !72
+ %iWidth2 = bitcast ptr %ptSrcCopySize to ptr, !dbg !71
+ %1 = load i16, ptr %iWidth2, align 2, !dbg !71, !tbaa !72
call void @llvm.dbg.value(metadata i16 %1, metadata !40, metadata !DIExpression(DW_OP_LLVM_convert, 16, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_stack_value)), !dbg !62
%conv = sext i16 %1 to i32, !dbg !73
call void @llvm.dbg.value(metadata i32 %conv, metadata !40, metadata !DIExpression()), !dbg !62
%sub = add nsw i32 %conv, -1
- %iWidth33 = bitcast %struct.arm_2d_size_t* %ptDstCopySize to i16*
- %2 = load i16, i16* %iWidth33, align 2, !tbaa !72
+ %iWidth33 = bitcast ptr %ptDstCopySize to ptr
+ %2 = load i16, ptr %iWidth33, align 2, !tbaa !72
%conv4 = sext i16 %2 to i32
%3 = tail call { <8 x i16>, i32 } @llvm.arm.mve.vddup.v8i16(i32 %sub, i32 1), !dbg !62
%4 = add i32 %conv4, 7, !dbg !70
@@ -51,14 +51,14 @@
ret void, !dbg !74
for.body: ; preds = %do.end, %for.body.lr.ph
- %pSource.addr.037 = phi i16* [ %pSource, %for.body.lr.ph ], [ %add.ptr9, %do.end ]
- %pTarget.addr.036 = phi i16* [ %pTarget, %for.body.lr.ph ], [ %add.ptr11, %do.end ]
+ %pSource.addr.037 = phi ptr [ %pSource, %for.body.lr.ph ], [ %add.ptr9, %do.end ]
+ %pTarget.addr.036 = phi ptr [ %pTarget, %for.body.lr.ph ], [ %add.ptr11, %do.end ]
%y.035 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %do.end ]
- call void @llvm.dbg.value(metadata i16* %pSource.addr.037, metadata !33, metadata !DIExpression()), !dbg !62
- call void @llvm.dbg.value(metadata i16* %pTarget.addr.036, metadata !35, metadata !DIExpression()), !dbg !62
+ call void @llvm.dbg.value(metadata ptr %pSource.addr.037, metadata !33, metadata !DIExpression()), !dbg !62
+ call void @llvm.dbg.value(metadata ptr %pTarget.addr.036, metadata !35, metadata !DIExpression()), !dbg !62
call void @llvm.dbg.value(metadata i32 %y.035, metadata !43, metadata !DIExpression()), !dbg !63
- call void @llvm.dbg.value(metadata i16* %pTarget.addr.036, metadata !46, metadata !DIExpression()), !dbg !75
- call void @llvm.dbg.value(metadata i16* %pSource.addr.037, metadata !49, metadata !DIExpression()), !dbg !75
+ call void @llvm.dbg.value(metadata ptr %pTarget.addr.036, metadata !46, metadata !DIExpression()), !dbg !75
+ call void @llvm.dbg.value(metadata ptr %pSource.addr.037, metadata !49, metadata !DIExpression()), !dbg !75
call void @llvm.dbg.value(metadata i32 %conv4, metadata !50, metadata !DIExpression()), !dbg !75
call void @llvm.dbg.value(metadata i32 undef, metadata !51, metadata !DIExpression()), !dbg !75
call void @llvm.dbg.value(metadata <8 x i16> undef, metadata !52, metadata !DIExpression()), !dbg !75
@@ -66,20 +66,20 @@
br label %do.body, !dbg !76
do.body: ; preds = %do.body, %for.body
- %pDst.0 = phi i16* [ %pTarget.addr.036, %for.body ], [ %add.ptr, %do.body ], !dbg !75
+ %pDst.0 = phi ptr [ %pTarget.addr.036, %for.body ], [ %add.ptr, %do.body ], !dbg !75
%dstWidth.0 = phi i32 [ %conv4, %for.body ], [ %sub5, %do.body ], !dbg !75
%.pn = phi { <8 x i16>, i32 } [ %3, %for.body ], [ %12, %do.body ]
%9 = phi i32 [ %8, %for.body ], [ %17, %do.body ], !dbg !75
- %pDst.01 = bitcast i16* %pDst.0 to <8 x i16>*, !dbg !75
+ %pDst.01 = bitcast ptr %pDst.0 to ptr, !dbg !75
%offset.0 = extractvalue { <8 x i16>, i32 } %.pn, 0, !dbg !75
%curOffsetIdx.0 = extractvalue { <8 x i16>, i32 } %.pn, 1, !dbg !75
call void @llvm.dbg.value(metadata <8 x i16> %offset.0, metadata !52, metadata !DIExpression()), !dbg !75
call void @llvm.dbg.value(metadata i32 %curOffsetIdx.0, metadata !51, metadata !DIExpression()), !dbg !75
call void @llvm.dbg.value(metadata i32 %dstWidth.0, metadata !50, metadata !DIExpression()), !dbg !75
- call void @llvm.dbg.value(metadata i16* %pDst.0, metadata !46, metadata !DIExpression()), !dbg !75
+ call void @llvm.dbg.value(metadata ptr %pDst.0, metadata !46, metadata !DIExpression()), !dbg !75
%10 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %dstWidth.0), !dbg !77
call void @llvm.dbg.value(metadata i32 undef, metadata !58, metadata !DIExpression()), !dbg !78
- %11 = tail call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %pSource.addr.037, <8 x i16> %offset.0, i32 16, i32 1, i32 1, <8 x i1> %10), !dbg !79
+ %11 = tail call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %pSource.addr.037, <8 x i16> %offset.0, i32 16, i32 1, i32 1, <8 x i1> %10), !dbg !79
call void @llvm.dbg.value(metadata <8 x i16> %11, metadata !61, metadata !DIExpression()), !dbg !78
%12 = tail call { <8 x i16>, i32 } @llvm.arm.mve.vddup.predicated.v8i16.v8i1(<8 x i16> undef, i32 %curOffsetIdx.0, i32 1, <8 x i1> %10), !dbg !80
call void @llvm.dbg.value(metadata i32 undef, metadata !51, metadata !DIExpression()), !dbg !75
@@ -89,9 +89,9 @@
%15 = icmp ne <8 x i16> %11, %14, !dbg !81
%16 = and <8 x i1> %15, %10, !dbg !81
call void @llvm.dbg.value(metadata i32 undef, metadata !58, metadata !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_stack_value)), !dbg !78
- tail call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %11, <8 x i16>* %pDst.01, i32 2, <8 x i1> %16), !dbg !82
- %add.ptr = getelementptr inbounds i16, i16* %pDst.0, i32 8, !dbg !83
- call void @llvm.dbg.value(metadata i16* %add.ptr, metadata !46, metadata !DIExpression()), !dbg !75
+ tail call void @llvm.masked.store.v8i16.p0(<8 x i16> %11, ptr %pDst.01, i32 2, <8 x i1> %16), !dbg !82
+ %add.ptr = getelementptr inbounds i16, ptr %pDst.0, i32 8, !dbg !83
+ call void @llvm.dbg.value(metadata ptr %add.ptr, metadata !46, metadata !DIExpression()), !dbg !75
%sub5 = add nsw i32 %dstWidth.0, -8, !dbg !84
call void @llvm.dbg.value(metadata i32 %sub5, metadata !50, metadata !DIExpression()), !dbg !75
%17 = call i32 @llvm.loop.decrement.reg.i32(i32 %9, i32 1), !dbg !85
@@ -99,10 +99,10 @@
br i1 %18, label %do.body, label %do.end, !dbg !85, !llvm.loop !86
do.end: ; preds = %do.body
- %add.ptr9 = getelementptr inbounds i16, i16* %pSource.addr.037, i32 %conv8, !dbg !89
- call void @llvm.dbg.value(metadata i16* %add.ptr9, metadata !33, metadata !DIExpression()), !dbg !62
- %add.ptr11 = getelementptr inbounds i16, i16* %pTarget.addr.036, i32 %conv10, !dbg !90
- call void @llvm.dbg.value(metadata i16* %add.ptr11, metadata !35, metadata !DIExpression()), !dbg !62
+ %add.ptr9 = getelementptr inbounds i16, ptr %pSource.addr.037, i32 %conv8, !dbg !89
+ call void @llvm.dbg.value(metadata ptr %add.ptr9, metadata !33, metadata !DIExpression()), !dbg !62
+ %add.ptr11 = getelementptr inbounds i16, ptr %pTarget.addr.036, i32 %conv10, !dbg !90
+ call void @llvm.dbg.value(metadata ptr %add.ptr11, metadata !35, metadata !DIExpression()), !dbg !62
%inc = add nuw nsw i32 %y.035, 1, !dbg !91
call void @llvm.dbg.value(metadata i32 %inc, metadata !43, metadata !DIExpression()), !dbg !63
%exitcond.not = icmp eq i32 %inc, %conv1, !dbg !69
@@ -111,9 +111,9 @@
declare { <8 x i16>, i32 } @llvm.arm.mve.vddup.v8i16(i32, i32)
declare <8 x i1> @llvm.arm.mve.vctp16(i32)
- declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16*, <8 x i16>, i32, i32, i32, <8 x i1>)
+ declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr, <8 x i16>, i32, i32, i32, <8 x i1>)
declare { <8 x i16>, i32 } @llvm.arm.mve.vddup.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>)
- declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>)
+ declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>)
declare void @llvm.dbg.value(metadata, metadata, metadata)
declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wls-revert-placement.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wls-revert-placement.mir
index b6e5ca356951c..f1a40dd9f94cf 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wls-revert-placement.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wls-revert-placement.mir
@@ -48,8 +48,8 @@
for.body.lr.ph.split.us.split.us: ; preds = %for.body.lr.ph.split.us
%2 = sext i32 %cond11 to i64
%const19 = bitcast i32 1514690832 to i32
- store i64 %2, i64* @var_76, align 8
- store i8 %frombool, i8* @var_77, align 1
+ store i64 %2, ptr @var_76, align 8
+ store i8 %frombool, ptr @var_77, align 1
%3 = add i32 %i, %const19
%4 = add nsw i32 %k, -1
%xtraiter154 = and i32 %k, 3
@@ -61,19 +61,19 @@
br label %for.body.us.us
for.body.us.us: ; preds = %for.body.us.us, %for.body.lr.ph.split.us.split.us.new
- %lsr.iv = phi [12 x [12 x i8]]* [ %6, %for.body.us.us ], [ bitcast (i8* getelementptr inbounds ([12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 2, i32 0) to [12 x [12 x i8]]*), %for.body.lr.ph.split.us.split.us.new ]
+ %lsr.iv = phi ptr [ %6, %for.body.us.us ], [ getelementptr inbounds ([12 x [12 x i8]], ptr @arr_163, i32 0, i32 2, i32 0), %for.body.lr.ph.split.us.split.us.new ]
%ac.085.us.us = phi i32 [ 0, %for.body.lr.ph.split.us.split.us.new ], [ %add43.us.us.3, %for.body.us.us ]
- %lsr.iv3 = bitcast [12 x [12 x i8]]* %lsr.iv to i8*
- %scevgep6 = getelementptr i8, i8* %lsr.iv3, i32 -24
- call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep6, i8 %conv29, i32 %3, i1 false)
- %scevgep5 = getelementptr i8, i8* %lsr.iv3, i32 -12
- call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep5, i8 %conv29, i32 %3, i1 false)
- call void @llvm.memset.p0i8.i32(i8* align 1 %lsr.iv3, i8 %conv29, i32 %3, i1 false)
- %scevgep4 = getelementptr i8, i8* %lsr.iv3, i32 12
- call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep4, i8 %conv29, i32 %3, i1 false)
+ %lsr.iv3 = bitcast ptr %lsr.iv to ptr
+ %scevgep6 = getelementptr i8, ptr %lsr.iv3, i32 -24
+ call void @llvm.memset.p0.i32(ptr align 1 %scevgep6, i8 %conv29, i32 %3, i1 false)
+ %scevgep5 = getelementptr i8, ptr %lsr.iv3, i32 -12
+ call void @llvm.memset.p0.i32(ptr align 1 %scevgep5, i8 %conv29, i32 %3, i1 false)
+ call void @llvm.memset.p0.i32(ptr align 1 %lsr.iv3, i8 %conv29, i32 %3, i1 false)
+ %scevgep4 = getelementptr i8, ptr %lsr.iv3, i32 12
+ call void @llvm.memset.p0.i32(ptr align 1 %scevgep4, i8 %conv29, i32 %3, i1 false)
%add43.us.us.3 = add nuw i32 %ac.085.us.us, 4
- %scevgep2 = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* %lsr.iv, i32 0, i32 4, i32 0
- %6 = bitcast i8* %scevgep2 to [12 x [12 x i8]]*
+ %scevgep2 = getelementptr [12 x [12 x i8]], ptr %lsr.iv, i32 0, i32 4, i32 0
+ %6 = bitcast ptr %scevgep2 to ptr
%niter159.ncmp.3 = icmp eq i32 %unroll_iter158, %add43.us.us.3
br i1 %niter159.ncmp.3, label %for.cond.for.cond45.preheader_crit_edge.loopexit135.unr-lcssa, label %for.body.us.us
@@ -102,36 +102,36 @@
br label %for.body.us.us115
for.body.us.us115: ; preds = %for.body.us.us115, %for.body.us.us115.preheader.new
- %lsr.iv7 = phi [12 x [12 x i8]]* [ %12, %for.body.us.us115 ], [ bitcast (i8* getelementptr inbounds ([12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 2, i32 0) to [12 x [12 x i8]]*), %for.body.us.us115.preheader.new ]
+ %lsr.iv7 = phi ptr [ %12, %for.body.us.us115 ], [ getelementptr inbounds ([12 x [12 x i8]], ptr @arr_163, i32 0, i32 2, i32 0), %for.body.us.us115.preheader.new ]
%ac.085.us.us116 = phi i32 [ 0, %for.body.us.us115.preheader.new ], [ %add43.us.us120.3, %for.body.us.us115 ]
- %lsr.iv79 = bitcast [12 x [12 x i8]]* %lsr.iv7 to i8*
- %scevgep12 = getelementptr i8, i8* %lsr.iv79, i32 -24
- call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep12, i8 %conv29, i32 %8, i1 false)
- %scevgep11 = getelementptr i8, i8* %lsr.iv79, i32 -12
- call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep11, i8 %conv29, i32 %8, i1 false)
- call void @llvm.memset.p0i8.i32(i8* align 1 %lsr.iv79, i8 %conv29, i32 %8, i1 false)
- %scevgep10 = getelementptr i8, i8* %lsr.iv79, i32 12
- call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep10, i8 %conv29, i32 %8, i1 false)
+ %lsr.iv79 = bitcast ptr %lsr.iv7 to ptr
+ %scevgep12 = getelementptr i8, ptr %lsr.iv79, i32 -24
+ call void @llvm.memset.p0.i32(ptr align 1 %scevgep12, i8 %conv29, i32 %8, i1 false)
+ %scevgep11 = getelementptr i8, ptr %lsr.iv79, i32 -12
+ call void @llvm.memset.p0.i32(ptr align 1 %scevgep11, i8 %conv29, i32 %8, i1 false)
+ call void @llvm.memset.p0.i32(ptr align 1 %lsr.iv79, i8 %conv29, i32 %8, i1 false)
+ %scevgep10 = getelementptr i8, ptr %lsr.iv79, i32 12
+ call void @llvm.memset.p0.i32(ptr align 1 %scevgep10, i8 %conv29, i32 %8, i1 false)
%add43.us.us120.3 = add nuw i32 %ac.085.us.us116, 4
- %scevgep8 = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* %lsr.iv7, i32 0, i32 4, i32 0
- %12 = bitcast i8* %scevgep8 to [12 x [12 x i8]]*
+ %scevgep8 = getelementptr [12 x [12 x i8]], ptr %lsr.iv7, i32 0, i32 4, i32 0
+ %12 = bitcast ptr %scevgep8 to ptr
%niter153.ncmp.3 = icmp eq i32 %unroll_iter152, %add43.us.us120.3
br i1 %niter153.ncmp.3, label %for.cond.for.cond45.preheader_crit_edge.loopexit.unr-lcssa, label %for.body.us.us115
for.body.us: ; preds = %for.body.us, %for.body.us.preheader.new
- %lsr.iv13 = phi [12 x [12 x i8]]* [ %13, %for.body.us ], [ bitcast (i8* getelementptr inbounds ([12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 2, i32 0) to [12 x [12 x i8]]*), %for.body.us.preheader.new ]
+ %lsr.iv13 = phi ptr [ %13, %for.body.us ], [ getelementptr inbounds ([12 x [12 x i8]], ptr @arr_163, i32 0, i32 2, i32 0), %for.body.us.preheader.new ]
%ac.085.us = phi i32 [ 0, %for.body.us.preheader.new ], [ %add43.us.3, %for.body.us ]
- %lsr.iv1315 = bitcast [12 x [12 x i8]]* %lsr.iv13 to i8*
- %scevgep18 = getelementptr i8, i8* %lsr.iv1315, i32 -24
- call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep18, i8 %conv29, i32 %8, i1 false)
- %scevgep17 = getelementptr i8, i8* %lsr.iv1315, i32 -12
- call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep17, i8 %conv29, i32 %8, i1 false)
- call void @llvm.memset.p0i8.i32(i8* align 1 %lsr.iv1315, i8 %conv29, i32 %8, i1 false)
- %scevgep16 = getelementptr i8, i8* %lsr.iv1315, i32 12
- call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep16, i8 %conv29, i32 %8, i1 false)
+ %lsr.iv1315 = bitcast ptr %lsr.iv13 to ptr
+ %scevgep18 = getelementptr i8, ptr %lsr.iv1315, i32 -24
+ call void @llvm.memset.p0.i32(ptr align 1 %scevgep18, i8 %conv29, i32 %8, i1 false)
+ %scevgep17 = getelementptr i8, ptr %lsr.iv1315, i32 -12
+ call void @llvm.memset.p0.i32(ptr align 1 %scevgep17, i8 %conv29, i32 %8, i1 false)
+ call void @llvm.memset.p0.i32(ptr align 1 %lsr.iv1315, i8 %conv29, i32 %8, i1 false)
+ %scevgep16 = getelementptr i8, ptr %lsr.iv1315, i32 12
+ call void @llvm.memset.p0.i32(ptr align 1 %scevgep16, i8 %conv29, i32 %8, i1 false)
%add43.us.3 = add nuw i32 %ac.085.us, 4
- %scevgep14 = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* %lsr.iv13, i32 0, i32 4, i32 0
- %13 = bitcast i8* %scevgep14 to [12 x [12 x i8]]*
+ %scevgep14 = getelementptr [12 x [12 x i8]], ptr %lsr.iv13, i32 0, i32 4, i32 0
+ %13 = bitcast ptr %scevgep14 to ptr
%niter.ncmp.3 = icmp eq i32 %unroll_iter, %add43.us.3
br i1 %niter.ncmp.3, label %for.cond.for.cond45.preheader_crit_edge.loopexit147.unr-lcssa, label %for.body.us
@@ -142,8 +142,8 @@
for.body.lr.ph.split.split.us: ; preds = %for.body.lr.ph.split
%15 = icmp eq i32 %m, 0
%16 = sext i32 %cond11 to i64
- store i64 %16, i64* @var_76, align 8
- store i8 %frombool, i8* @var_77, align 1
+ store i64 %16, ptr @var_76, align 8
+ store i8 %frombool, ptr @var_77, align 1
%spec.select = select i1 %15, i32 %lor.ext, i32 %conv36
br label %for.cond.for.cond45.preheader_crit_edge
@@ -158,8 +158,8 @@
br i1 %lcmp.mod157.not, label %for.cond.for.cond45.preheader_crit_edge.loopexit135, label %for.body.us.us.epil
for.body.us.us.epil: ; preds = %for.cond.for.cond45.preheader_crit_edge.loopexit135.unr-lcssa
- %scevgep140.epil = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 %ac.085.us.us.unr, i32 0
- call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep140.epil, i8 %conv29, i32 %3, i1 false)
+ %scevgep140.epil = getelementptr [12 x [12 x i8]], ptr @arr_163, i32 0, i32 %ac.085.us.us.unr, i32 0
+ call void @llvm.memset.p0.i32(ptr align 1 %scevgep140.epil, i8 %conv29, i32 %3, i1 false)
%epil.iter.cmp156.not = icmp eq i32 %xtraiter154, 1
br i1 %epil.iter.cmp156.not, label %for.cond.for.cond45.preheader_crit_edge.loopexit135, label %for.body.us.us.epil.1
@@ -174,8 +174,8 @@
br i1 %lcmp.mod151.not, label %for.cond.for.cond45.preheader_crit_edge, label %for.body.us.us115.epil
for.body.us.us115.epil: ; preds = %for.cond.for.cond45.preheader_crit_edge.loopexit.unr-lcssa
- %scevgep138.epil = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 %ac.085.us.us116.unr, i32 0
- call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep138.epil, i8 %conv29, i32 %8, i1 false)
+ %scevgep138.epil = getelementptr [12 x [12 x i8]], ptr @arr_163, i32 0, i32 %ac.085.us.us116.unr, i32 0
+ call void @llvm.memset.p0.i32(ptr align 1 %scevgep138.epil, i8 %conv29, i32 %8, i1 false)
%epil.iter.cmp150.not = icmp eq i32 %xtraiter148, 1
br i1 %epil.iter.cmp150.not, label %for.cond.for.cond45.preheader_crit_edge, label %for.body.us.us115.epil.1
@@ -185,14 +185,14 @@
br i1 %lcmp.mod.not, label %for.cond.for.cond45.preheader_crit_edge, label %for.body.us.epil
for.body.us.epil: ; preds = %for.cond.for.cond45.preheader_crit_edge.loopexit147.unr-lcssa
- %scevgep.epil = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 %ac.085.us.unr, i32 0
- call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep.epil, i8 %conv29, i32 %8, i1 false)
+ %scevgep.epil = getelementptr [12 x [12 x i8]], ptr @arr_163, i32 0, i32 %ac.085.us.unr, i32 0
+ call void @llvm.memset.p0.i32(ptr align 1 %scevgep.epil, i8 %conv29, i32 %8, i1 false)
%epil.iter.cmp.not = icmp eq i32 %xtraiter148, 1
br i1 %epil.iter.cmp.not, label %for.cond.for.cond45.preheader_crit_edge, label %for.body.us.epil.1
for.cond.for.cond45.preheader_crit_edge: ; preds = %for.body.us.us115.epil.2, %for.body.us.us115.epil.1, %for.body.us.epil.2, %for.body.us.epil.1, %for.body.us.epil, %for.cond.for.cond45.preheader_crit_edge.loopexit147.unr-lcssa, %for.body.us.us115.epil, %for.cond.for.cond45.preheader_crit_edge.loopexit.unr-lcssa, %for.cond.for.cond45.preheader_crit_edge.loopexit135, %for.body.lr.ph.split.split, %for.body.lr.ph.split.split.us
%.us-phi = phi i32 [ %cond41.us.us, %for.cond.for.cond45.preheader_crit_edge.loopexit135 ], [ %spec.select, %for.body.lr.ph.split.split.us ], [ %spec.select143, %for.body.lr.ph.split.split ], [ %lor.ext, %for.body.us.us115.epil ], [ %lor.ext, %for.body.us.us115.epil.1 ], [ %lor.ext, %for.body.us.us115.epil.2 ], [ %lor.ext, %for.cond.for.cond45.preheader_crit_edge.loopexit.unr-lcssa ], [ %conv36, %for.body.us.epil ], [ %conv36, %for.body.us.epil.1 ], [ %conv36, %for.body.us.epil.2 ], [ %conv36, %for.cond.for.cond45.preheader_crit_edge.loopexit147.unr-lcssa ]
- store i32 %.us-phi, i32* @var_81, align 4
+ store i32 %.us-phi, ptr @var_81, align 4
br label %for.cond45.preheader
for.cond45.preheader: ; preds = %for.cond.for.cond45.preheader_crit_edge, %entry
@@ -216,69 +216,69 @@
%tobool47.not = icmp eq i32 %conv46, 0
%cond51 = select i1 %tobool47.not, i32 %l, i32 %h
%idxprom = and i32 %cond51, 255
- %arrayidx59 = getelementptr inbounds [22 x i8], [22 x i8]* @arr_239, i32 0, i32 %idxprom
- store i8 %conv58, i8* %arrayidx59, align 1
+ %arrayidx59 = getelementptr inbounds [22 x i8], ptr @arr_239, i32 0, i32 %idxprom
+ store i8 %conv58, ptr %arrayidx59, align 1
%conv46.1 = and i32 %cond51, 255
%tobool47.not.1 = icmp eq i32 %conv46.1, 0
%cond51.1 = select i1 %tobool47.not.1, i32 %l, i32 %h
%idxprom.1 = and i32 %cond51.1, 255
- %arrayidx59.1 = getelementptr inbounds [22 x i8], [22 x i8]* @arr_239, i32 0, i32 %idxprom.1
- store i8 %conv58, i8* %arrayidx59.1, align 1
+ %arrayidx59.1 = getelementptr inbounds [22 x i8], ptr @arr_239, i32 0, i32 %idxprom.1
+ store i8 %conv58, ptr %arrayidx59.1, align 1
%conv46.2 = and i32 %cond51.1, 255
%tobool47.not.2 = icmp eq i32 %conv46.2, 0
%cond51.2 = select i1 %tobool47.not.2, i32 %l, i32 %h
%idxprom.2 = and i32 %cond51.2, 255
- %arrayidx59.2 = getelementptr inbounds [22 x i8], [22 x i8]* @arr_239, i32 0, i32 %idxprom.2
- store i8 %conv58, i8* %arrayidx59.2, align 1
+ %arrayidx59.2 = getelementptr inbounds [22 x i8], ptr @arr_239, i32 0, i32 %idxprom.2
+ store i8 %conv58, ptr %arrayidx59.2, align 1
%conv46.3 = and i32 %cond51.2, 255
%tobool47.not.3 = icmp eq i32 %conv46.3, 0
%cond51.3 = select i1 %tobool47.not.3, i32 %l, i32 %h
%idxprom.3 = and i32 %cond51.3, 255
- %arrayidx59.3 = getelementptr inbounds [22 x i8], [22 x i8]* @arr_239, i32 0, i32 %idxprom.3
- store i8 %conv58, i8* %arrayidx59.3, align 1
+ %arrayidx59.3 = getelementptr inbounds [22 x i8], ptr @arr_239, i32 0, i32 %idxprom.3
+ store i8 %conv58, ptr %arrayidx59.3, align 1
br label %for.cond45
for.body.us.epil.1: ; preds = %for.body.us.epil
%add43.us.epil = add nuw nsw i32 %ac.085.us.unr, 1
- %scevgep.epil.1 = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 %add43.us.epil, i32 0
- call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep.epil.1, i8 %conv29, i32 %8, i1 false)
+ %scevgep.epil.1 = getelementptr [12 x [12 x i8]], ptr @arr_163, i32 0, i32 %add43.us.epil, i32 0
+ call void @llvm.memset.p0.i32(ptr align 1 %scevgep.epil.1, i8 %conv29, i32 %8, i1 false)
%epil.iter.cmp.1.not = icmp eq i32 %xtraiter148, 2
br i1 %epil.iter.cmp.1.not, label %for.cond.for.cond45.preheader_crit_edge, label %for.body.us.epil.2
for.body.us.epil.2: ; preds = %for.body.us.epil.1
%add43.us.epil.1 = add nuw nsw i32 %ac.085.us.unr, 2
- %scevgep.epil.2 = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 %add43.us.epil.1, i32 0
- call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep.epil.2, i8 %conv29, i32 %8, i1 false)
+ %scevgep.epil.2 = getelementptr [12 x [12 x i8]], ptr @arr_163, i32 0, i32 %add43.us.epil.1, i32 0
+ call void @llvm.memset.p0.i32(ptr align 1 %scevgep.epil.2, i8 %conv29, i32 %8, i1 false)
br label %for.cond.for.cond45.preheader_crit_edge
for.body.us.us115.epil.1: ; preds = %for.body.us.us115.epil
%add43.us.us120.epil = add nuw nsw i32 %ac.085.us.us116.unr, 1
- %scevgep138.epil.1 = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 %add43.us.us120.epil, i32 0
- call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep138.epil.1, i8 %conv29, i32 %8, i1 false)
+ %scevgep138.epil.1 = getelementptr [12 x [12 x i8]], ptr @arr_163, i32 0, i32 %add43.us.us120.epil, i32 0
+ call void @llvm.memset.p0.i32(ptr align 1 %scevgep138.epil.1, i8 %conv29, i32 %8, i1 false)
%epil.iter.cmp150.1.not = icmp eq i32 %xtraiter148, 2
br i1 %epil.iter.cmp150.1.not, label %for.cond.for.cond45.preheader_crit_edge, label %for.body.us.us115.epil.2
for.body.us.us115.epil.2: ; preds = %for.body.us.us115.epil.1
%add43.us.us120.epil.1 = add nuw nsw i32 %ac.085.us.us116.unr, 2
- %scevgep138.epil.2 = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 %add43.us.us120.epil.1, i32 0
- call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep138.epil.2, i8 %conv29, i32 %8, i1 false)
+ %scevgep138.epil.2 = getelementptr [12 x [12 x i8]], ptr @arr_163, i32 0, i32 %add43.us.us120.epil.1, i32 0
+ call void @llvm.memset.p0.i32(ptr align 1 %scevgep138.epil.2, i8 %conv29, i32 %8, i1 false)
br label %for.cond.for.cond45.preheader_crit_edge
for.body.us.us.epil.1: ; preds = %for.body.us.us.epil
%add43.us.us.epil = add nuw nsw i32 %ac.085.us.us.unr, 1
- %scevgep140.epil.1 = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 %add43.us.us.epil, i32 0
- call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep140.epil.1, i8 %conv29, i32 %3, i1 false)
+ %scevgep140.epil.1 = getelementptr [12 x [12 x i8]], ptr @arr_163, i32 0, i32 %add43.us.us.epil, i32 0
+ call void @llvm.memset.p0.i32(ptr align 1 %scevgep140.epil.1, i8 %conv29, i32 %3, i1 false)
%epil.iter.cmp156.1.not = icmp eq i32 %xtraiter154, 2
br i1 %epil.iter.cmp156.1.not, label %for.cond.for.cond45.preheader_crit_edge.loopexit135, label %for.body.us.us.epil.2
for.body.us.us.epil.2: ; preds = %for.body.us.us.epil.1
%add43.us.us.epil.1 = add nuw nsw i32 %ac.085.us.us.unr, 2
- %scevgep140.epil.2 = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 %add43.us.us.epil.1, i32 0
- call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep140.epil.2, i8 %conv29, i32 %3, i1 false)
+ %scevgep140.epil.2 = getelementptr [12 x [12 x i8]], ptr @arr_163, i32 0, i32 %add43.us.us.epil.1, i32 0
+ call void @llvm.memset.p0.i32(ptr align 1 %scevgep140.epil.2, i8 %conv29, i32 %3, i1 false)
br label %for.cond.for.cond45.preheader_crit_edge.loopexit135
}
- declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg)
+ declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg)
...
---
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir
index b5998e3be0823..8076caa563a30 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s
--- |
- define dso_local arm_aapcs_vfpcc void @test_wlstp8(i8* noalias nocapture %a, i8* noalias nocapture readonly %b, i8* noalias nocapture readonly %c, i32 %N) {
+ define dso_local arm_aapcs_vfpcc void @test_wlstp8(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c, i32 %N) {
entry:
%0 = add i32 %N, 15
%1 = lshr i32 %0, 4
@@ -22,16 +22,16 @@
%5 = phi i32 [ %N, %vector.ph ], [ %7, %vector.body ]
%6 = call <16 x i1> @llvm.arm.vctp8(i32 %5)
%7 = sub i32 %5, 16
- %scevgep4 = getelementptr i8, i8* %b, i32 %index
- %scevgep45 = bitcast i8* %scevgep4 to <16 x i8>*
- %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %scevgep45, i32 1, <16 x i1> %6, <16 x i8> undef)
- %scevgep2 = getelementptr i8, i8* %c, i32 %index
- %scevgep23 = bitcast i8* %scevgep2 to <16 x i8>*
- %wide.masked.load14 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %scevgep23, i32 1, <16 x i1> %6, <16 x i8> undef)
+ %scevgep4 = getelementptr i8, ptr %b, i32 %index
+ %scevgep45 = bitcast ptr %scevgep4 to ptr
+ %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %scevgep45, i32 1, <16 x i1> %6, <16 x i8> undef)
+ %scevgep2 = getelementptr i8, ptr %c, i32 %index
+ %scevgep23 = bitcast ptr %scevgep2 to ptr
+ %wide.masked.load14 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %scevgep23, i32 1, <16 x i1> %6, <16 x i8> undef)
%tmp5 = mul <16 x i8> %wide.masked.load14, %wide.masked.load
- %scevgep = getelementptr i8, i8* %a, i32 %index
- %scevgep1 = bitcast i8* %scevgep to <16 x i8>*
- call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %tmp5, <16 x i8>* %scevgep1, i32 1, <16 x i1> %6)
+ %scevgep = getelementptr i8, ptr %a, i32 %index
+ %scevgep1 = bitcast ptr %scevgep to ptr
+ call void @llvm.masked.store.v16i8.p0(<16 x i8> %tmp5, ptr %scevgep1, i32 1, <16 x i1> %6)
%index.next = add i32 %index, 16
%loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %count, i32 1)
%tmp8 = icmp eq i32 %loop.dec, 0
@@ -41,7 +41,7 @@
ret void
}
- define dso_local arm_aapcs_vfpcc void @test_wlstp16(i16* noalias nocapture %a, i16* noalias nocapture readonly %b, i16* noalias nocapture readonly %c, i32 %N) {
+ define dso_local arm_aapcs_vfpcc void @test_wlstp16(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c, i32 %N) {
entry:
%0 = add i32 %N, 7
%1 = lshr i32 %0, 3
@@ -56,32 +56,32 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv5 = phi i16* [ %scevgep6, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv2 = phi i16* [ %scevgep3, %vector.body ], [ %c, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv5 = phi ptr [ %scevgep6, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv2 = phi ptr [ %scevgep3, %vector.body ], [ %c, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%count = phi i32 [ %n.vec, %vector.ph ], [ %loop.dec, %vector.body ]
%5 = phi i32 [ %N, %vector.ph ], [ %7, %vector.body ]
- %lsr.iv57 = bitcast i16* %lsr.iv5 to <8 x i16>*
- %lsr.iv24 = bitcast i16* %lsr.iv2 to <8 x i16>*
- %lsr.iv1 = bitcast i16* %lsr.iv to <8 x i16>*
+ %lsr.iv57 = bitcast ptr %lsr.iv5 to ptr
+ %lsr.iv24 = bitcast ptr %lsr.iv2 to ptr
+ %lsr.iv1 = bitcast ptr %lsr.iv to ptr
%6 = call <8 x i1> @llvm.arm.vctp16(i32 %5)
%7 = sub i32 %5, 8
- %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv57, i32 2, <8 x i1> %6, <8 x i16> undef)
- %wide.masked.load14 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv24, i32 2, <8 x i1> %6, <8 x i16> undef)
+ %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %lsr.iv57, i32 2, <8 x i1> %6, <8 x i16> undef)
+ %wide.masked.load14 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %lsr.iv24, i32 2, <8 x i1> %6, <8 x i16> undef)
%tmp5 = mul <8 x i16> %wide.masked.load14, %wide.masked.load
- call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %tmp5, <8 x i16>* %lsr.iv1, i32 2, <8 x i1> %6)
+ call void @llvm.masked.store.v8i16.p0(<8 x i16> %tmp5, ptr %lsr.iv1, i32 2, <8 x i1> %6)
%loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %count, i32 1)
%tmp8 = icmp eq i32 %loop.dec, 0
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 8
- %scevgep3 = getelementptr i16, i16* %lsr.iv2, i32 8
- %scevgep6 = getelementptr i16, i16* %lsr.iv5, i32 8
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 8
+ %scevgep3 = getelementptr i16, ptr %lsr.iv2, i32 8
+ %scevgep6 = getelementptr i16, ptr %lsr.iv5, i32 8
br i1 %tmp8, label %for.cond.cleanup, label %vector.body
for.cond.cleanup: ; preds = %vector.body, %entry
ret void
}
- define dso_local i32 @test_wlstp32(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) {
+ define dso_local i32 @test_wlstp32(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) {
entry:
%0 = add i32 %N, 3
%1 = lshr i32 %0, 2
@@ -96,23 +96,23 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv2 = phi i32* [ %scevgep3, %vector.body ], [ %a, %vector.ph ]
- %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv2 = phi ptr [ %scevgep3, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %b, %vector.ph ]
%count = phi i32 [ %n.vec, %vector.ph ], [ %loop.dec, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp6, %vector.body ]
%5 = phi i32 [ %N, %vector.ph ], [ %7, %vector.body ]
- %lsr.iv24 = bitcast i32* %lsr.iv2 to <4 x i32>*
- %lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>*
+ %lsr.iv24 = bitcast ptr %lsr.iv2 to ptr
+ %lsr.iv1 = bitcast ptr %lsr.iv to ptr
%6 = call <4 x i1> @llvm.arm.vctp32(i32 %5)
%7 = sub i32 %5, 4
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv24, i32 4, <4 x i1> %6, <4 x i32> undef)
- %wide.masked.load13 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1, i32 4, <4 x i1> %6, <4 x i32> undef)
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv24, i32 4, <4 x i1> %6, <4 x i32> undef)
+ %wide.masked.load13 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1, i32 4, <4 x i1> %6, <4 x i32> undef)
%tmp5 = mul nsw <4 x i32> %wide.masked.load13, %wide.masked.load
%tmp6 = add nsw <4 x i32> %tmp5, %vec.phi
%loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %count, i32 1)
%tmp7 = icmp eq i32 %loop.dec, 0
- %scevgep = getelementptr i32, i32* %lsr.iv, i32 4
- %scevgep3 = getelementptr i32, i32* %lsr.iv2, i32 4
+ %scevgep = getelementptr i32, ptr %lsr.iv, i32 4
+ %scevgep3 = getelementptr i32, ptr %lsr.iv2, i32 4
br i1 %tmp7, label %middle.block, label %vector.body
middle.block: ; preds = %vector.body
@@ -128,15 +128,15 @@
declare i1 @llvm.test.set.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
- declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>)
- declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>)
- declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
- declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>)
- declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>)
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
+ declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>)
+ declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>)
+ declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
+ declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32 immarg, <16 x i1>)
+ declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>)
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare <16 x i1> @llvm.arm.vctp8(i32)
- declare void @llvm.stackprotector(i8*, i8**)
+ declare void @llvm.stackprotector(ptr, ptr)
declare <8 x i1> @llvm.arm.vctp16(i32)
declare <4 x i1> @llvm.arm.vctp32(i32)
...
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir
index 6d4c6444dd778..3e1fd796d6ed0 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s
--- |
- define dso_local arm_aapcs_vfpcc signext i16 @wrong_liveout_shift(i8* nocapture readonly %b, i8* nocapture readonly %c, i32 %N) {
+ define dso_local arm_aapcs_vfpcc signext i16 @wrong_liveout_shift(ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) {
entry:
%cmp11 = icmp eq i32 %N, 0
%0 = add i32 %N, 7
@@ -20,23 +20,23 @@
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv20 = phi i8* [ %scevgep21, %vector.body ], [ %c, %vector.ph ]
- %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv20 = phi ptr [ %scevgep21, %vector.body ], [ %c, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %b, %vector.ph ]
%vec.phi = phi <8 x i16> [ <i16 32767, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, %vector.ph ], [ %15, %vector.body ]
%8 = phi i32 [ %start, %vector.ph ], [ %16, %vector.body ]
%9 = phi i32 [ %N, %vector.ph ], [ %11, %vector.body ]
- %lsr.iv2022 = bitcast i8* %lsr.iv20 to <8 x i8>*
- %lsr.iv19 = bitcast i8* %lsr.iv to <8 x i8>*
+ %lsr.iv2022 = bitcast ptr %lsr.iv20 to ptr
+ %lsr.iv19 = bitcast ptr %lsr.iv to ptr
%10 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %9)
%11 = sub i32 %9, 8
- %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %lsr.iv19, i32 1, <8 x i1> %10, <8 x i8> undef)
+ %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %lsr.iv19, i32 1, <8 x i1> %10, <8 x i8> undef)
%12 = zext <8 x i8> %wide.masked.load to <8 x i16>
- %wide.masked.load16 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %lsr.iv2022, i32 1, <8 x i1> %10, <8 x i8> undef)
+ %wide.masked.load16 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %lsr.iv2022, i32 1, <8 x i1> %10, <8 x i8> undef)
%13 = zext <8 x i8> %wide.masked.load16 to <8 x i16>
%14 = mul nuw <8 x i16> %13, %12
%15 = sub <8 x i16> %vec.phi, %14
- %scevgep = getelementptr i8, i8* %lsr.iv, i32 8
- %scevgep21 = getelementptr i8, i8* %lsr.iv20, i32 8
+ %scevgep = getelementptr i8, ptr %lsr.iv, i32 8
+ %scevgep21 = getelementptr i8, ptr %lsr.iv20, i32 8
%16 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %8, i32 1)
%17 = icmp ne i32 %16, 0
br i1 %17, label %vector.body, label %middle.block
@@ -53,7 +53,7 @@
%a.0.lcssa = phi i16 [ 32767, %entry ], [ %20, %middle.block ]
ret i16 %a.0.lcssa
}
- declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>)
+ declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32 immarg, <8 x i1>, <8 x i8>)
declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir
index 76b08a6418810..2aab1ed916e76 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir
@@ -5,7 +5,7 @@
# is too complex to process for now.
--- |
- define dso_local i32 @wrong_vctp_liveout(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
+ define dso_local i32 @wrong_vctp_liveout(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
entry:
%cmp9 = icmp eq i32 %N, 0
%tmp = add i32 %N, 3
@@ -22,22 +22,22 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp13, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
- %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr
%tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7)
%tmp9 = sub i32 %tmp7, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
- %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
+ %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32>
%tmp12 = mul nsw <4 x i32> %tmp11, %tmp10
%tmp13 = add <4 x i32> %tmp12, %vec.phi
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
- %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
+ %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4
%tmp14 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp15 = icmp ne i32 %tmp14, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -59,7 +59,7 @@
%res.0.lcssa = phi i32 [ 0, %entry ], [ %tmp18, %middle.block ]
ret i32 %res.0.lcssa
}
- declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
+ declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) #1
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir
index ae8870034e91e..b0a0ccbd66e12 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir
@@ -4,7 +4,7 @@
# The VCTP uses r2, which is redefined in the loop.
--- |
- define dso_local i32 @wrong_vctp_liveout(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
+ define dso_local i32 @wrong_vctp_liveout(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
entry:
%cmp9 = icmp eq i32 %N, 0
%0 = add i32 %N, 3
@@ -21,22 +21,22 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
- %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
+ %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ]
%6 = phi i32 [ %N, %vector.ph ], [ %8, %vector.body ]
- %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
- %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>*
+ %lsr.iv17 = bitcast ptr %lsr.iv to ptr
+ %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr
%7 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %6)
%8 = sub i32 %6, 4
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %7, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %7, <4 x i16> undef)
%9 = sext <4 x i16> %wide.masked.load to <4 x i32>
- %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %7, <4 x i16> undef)
+ %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %7, <4 x i16> undef)
%10 = sext <4 x i16> %wide.masked.load14 to <4 x i32>
%11 = mul nsw <4 x i32> %10, %9
%12 = add <4 x i32> %11, %vec.phi
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
- %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
+ %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4
%13 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%14 = icmp ne i32 %13, 0
%lsr.iv.next = add nsw i32 %lsr.iv1, -1
@@ -52,7 +52,7 @@
%res.0.lcssa = phi i32 [ 0, %entry ], [ %17, %middle.block ]
ret i32 %res.0.lcssa
}
- declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
+ declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) #1
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll b/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll
index cdfd2c845b55a..b91800c48cec4 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll
@@ -3,7 +3,7 @@
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
-define arm_aapcs_vfpcc void @push_out_add_sub_block(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) {
+define arm_aapcs_vfpcc void @push_out_add_sub_block(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) {
; CHECK-LABEL: @push_out_add_sub_block(
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> <i32 0, i32 2, i32 4, i32 6>, <i32 6, i32 6, i32 6, i32 6>
@@ -45,11 +45,11 @@ vector.body: ; preds = %vector.body, %vecto
lower.block: ; preds = %vector.body
%1 = add <4 x i32> %vec.ind, <i32 6, i32 6, i32 6, i32 6>
- %2 = getelementptr inbounds i32, i32* %data, <4 x i32> %1
- %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
- %3 = getelementptr inbounds i32, i32* %dst, i32 %index
- %4 = bitcast i32* %3 to <4 x i32>*
- store <4 x i32> %wide.masked.gather, <4 x i32>* %4, align 4
+ %2 = getelementptr inbounds i32, ptr %data, <4 x i32> %1
+ %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ %3 = getelementptr inbounds i32, ptr %dst, i32 %index
+ %4 = bitcast ptr %3 to ptr
+ store <4 x i32> %wide.masked.gather, ptr %4, align 4
%index.next = add i32 %index, 4
%vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8>
br label %vector.body.end
@@ -62,7 +62,7 @@ end:
ret void;
}
-define arm_aapcs_vfpcc void @push_out_add_sub_block_commutedphi(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) {
+define arm_aapcs_vfpcc void @push_out_add_sub_block_commutedphi(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) {
; CHECK-LABEL: @push_out_add_sub_block_commutedphi(
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> <i32 0, i32 2, i32 4, i32 6>, <i32 6, i32 6, i32 6, i32 6>
@@ -104,11 +104,11 @@ vector.body: ; preds = %vector.body, %vecto
lower.block: ; preds = %vector.body
%1 = add <4 x i32> %vec.ind, <i32 6, i32 6, i32 6, i32 6>
- %2 = getelementptr inbounds i32, i32* %data, <4 x i32> %1
- %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
- %3 = getelementptr inbounds i32, i32* %dst, i32 %index
- %4 = bitcast i32* %3 to <4 x i32>*
- store <4 x i32> %wide.masked.gather, <4 x i32>* %4, align 4
+ %2 = getelementptr inbounds i32, ptr %data, <4 x i32> %1
+ %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ %3 = getelementptr inbounds i32, ptr %dst, i32 %index
+ %4 = bitcast ptr %3 to ptr
+ store <4 x i32> %wide.masked.gather, ptr %4, align 4
%index.next = add i32 %index, 4
%vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8>
br label %vector.body.end
@@ -121,7 +121,7 @@ end:
ret void;
}
-define arm_aapcs_vfpcc void @push_out_mul_sub_block(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) {
+define arm_aapcs_vfpcc void @push_out_mul_sub_block(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) {
; CHECK-LABEL: @push_out_mul_sub_block(
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: [[PUSHEDOUTMUL:%.*]] = mul <4 x i32> <i32 0, i32 2, i32 4, i32 6>, <i32 3, i32 3, i32 3, i32 3>
@@ -166,11 +166,11 @@ vector.body: ; preds = %vector.body, %vecto
lower.block: ; preds = %vector.body
%1 = mul <4 x i32> %vec.ind, <i32 3, i32 3, i32 3, i32 3>
%2 = add <4 x i32> %1, <i32 6, i32 6, i32 6, i32 6>
- %3 = getelementptr inbounds i32, i32* %data, <4 x i32> %2
- %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %3, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
- %4 = getelementptr inbounds i32, i32* %dst, i32 %index
- %5 = bitcast i32* %4 to <4 x i32>*
- store <4 x i32> %wide.masked.gather, <4 x i32>* %5, align 4
+ %3 = getelementptr inbounds i32, ptr %data, <4 x i32> %2
+ %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %3, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ %4 = getelementptr inbounds i32, ptr %dst, i32 %index
+ %5 = bitcast ptr %4 to ptr
+ store <4 x i32> %wide.masked.gather, ptr %5, align 4
%index.next = add i32 %index, 4
%vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8>
br label %vector.body.end
@@ -184,7 +184,7 @@ end:
}
-define arm_aapcs_vfpcc void @push_out_mul_sub_loop(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) {
+define arm_aapcs_vfpcc void @push_out_mul_sub_loop(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) {
; CHECK-LABEL: @push_out_mul_sub_loop(
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -232,11 +232,11 @@ vector.2.ph:
vector.2.body: ; preds = %vector.body
%0 = mul <4 x i32> %vec.ind, <i32 3, i32 3, i32 3, i32 3>
%1 = add <4 x i32> %0, <i32 6, i32 6, i32 6, i32 6>
- %2 = getelementptr inbounds i32, i32* %data, <4 x i32> %1
- %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
- %3 = getelementptr inbounds i32, i32* %dst, i32 %index
- %4 = bitcast i32* %3 to <4 x i32>*
- store <4 x i32> %wide.masked.gather, <4 x i32>* %4, align 4
+ %2 = getelementptr inbounds i32, ptr %data, <4 x i32> %1
+ %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ %3 = getelementptr inbounds i32, ptr %dst, i32 %index
+ %4 = bitcast ptr %3 to ptr
+ store <4 x i32> %wide.masked.gather, ptr %4, align 4
br label %vector.2.body.end
vector.2.body.end: ; preds = %lower.block
@@ -254,7 +254,7 @@ end:
ret void;
}
-define arm_aapcs_vfpcc void @invariant_add(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) {
+define arm_aapcs_vfpcc void @invariant_add(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) {
; CHECK-LABEL: @invariant_add(
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -282,11 +282,11 @@ vector.body: ; preds = %vector.body, %vecto
%vec.ind = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ], [ %vec.ind.next, %vector.body ]
%l0 = mul <4 x i32> %vec.ind, <i32 3, i32 3, i32 3, i32 3>
%l1 = add <4 x i32> %l0, %vec.ind
- %l2 = getelementptr inbounds i32, i32* %data, <4 x i32> %l1
- %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %l2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
- %l3 = getelementptr inbounds i32, i32* %dst, i32 %index
- %l4 = bitcast i32* %l3 to <4 x i32>*
- store <4 x i32> %wide.masked.gather, <4 x i32>* %l4, align 4
+ %l2 = getelementptr inbounds i32, ptr %data, <4 x i32> %l1
+ %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %l2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ %l3 = getelementptr inbounds i32, ptr %dst, i32 %index
+ %l4 = bitcast ptr %l3 to ptr
+ store <4 x i32> %wide.masked.gather, ptr %l4, align 4
%index.next = add i32 %index, 4
%vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8>
%l5 = icmp eq i32 %index.next, %n.vec
@@ -296,7 +296,7 @@ end:
ret void;
}
-define void @gatherload(i32 %n, i32 %m, i32* nocapture %a, i32* nocapture readonly %b, i32 %call.us.us) {
+define void @gatherload(i32 %n, i32 %m, ptr nocapture %a, ptr nocapture readonly %b, i32 %call.us.us) {
; CHECK-LABEL: @gatherload(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -360,7 +360,7 @@ define void @gatherload(i32 %n, i32 %m, i32* nocapture %a, i32* nocapture readon
; CHECK-NEXT: ret void
;
entry:
- %a57 = bitcast i32* %a to i8*
+ %a57 = bitcast ptr %a to ptr
%cmp38 = icmp sgt i32 %n, 0
br i1 %cmp38, label %for.body.lr.ph, label %for.end16
@@ -370,11 +370,11 @@ for.body.lr.ph: ; preds = %entry
for.body.us.us.preheader: ; preds = %for.body.lr.ph
%0 = shl nuw i32 %m, 2
- %scevgep = getelementptr i32, i32* %a, i32 %m
- %scevgep64 = getelementptr i32, i32* %b, i32 %m
+ %scevgep = getelementptr i32, ptr %a, i32 %m
+ %scevgep64 = getelementptr i32, ptr %b, i32 %m
%min.iters.check = icmp ult i32 %m, 4
- %bound0 = icmp ugt i32* %scevgep64, %a
- %bound1 = icmp ugt i32* %scevgep, %b
+ %bound0 = icmp ugt ptr %scevgep64, %a
+ %bound1 = icmp ugt ptr %scevgep, %b
%found.conflict = and i1 %bound0, %bound1
%n.vec = and i32 %m, -4
%cmp.n = icmp eq i32 %n.vec, %m
@@ -384,21 +384,21 @@ for.body.us.us: ; preds = %for.body.us.us.preh
%i.039.us.us = phi i32 [ %inc15.us.us, %for.cond5.for.end13_crit_edge.us.us ], [ 0, %for.body.us.us.preheader ]
%1 = add i32 0, 0
%vla.us.us = alloca i32, i32 %call.us.us, align 4
- %vla.us.us56 = bitcast i32* %vla.us.us to i8*
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* nonnull align 4 %vla.us.us56, i8* align 4 %a57, i32 %0, i1 false)
+ %vla.us.us56 = bitcast ptr %vla.us.us to ptr
+ call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 4 %vla.us.us56, ptr align 4 %a57, i32 %0, i1 false)
%brmerge = select i1 %min.iters.check, i1 true, i1 %found.conflict
br i1 %brmerge, label %for.body7.us.us.preheader, label %vector.body
vector.body: ; preds = %for.body.us.us, %vector.body
%index = phi i32 [ %index.next, %vector.body ], [ 0, %for.body.us.us ]
- %2 = getelementptr inbounds i32, i32* %b, i32 %index
- %3 = bitcast i32* %2 to <4 x i32>*
- %wide.load = load <4 x i32>, <4 x i32>* %3, align 4
- %4 = getelementptr inbounds i32, i32* %vla.us.us, <4 x i32> %wide.load
- %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %4, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
- %5 = getelementptr inbounds i32, i32* %a, i32 %index
- %6 = bitcast i32* %5 to <4 x i32>*
- store <4 x i32> %wide.masked.gather, <4 x i32>* %6, align 4
+ %2 = getelementptr inbounds i32, ptr %b, i32 %index
+ %3 = bitcast ptr %2 to ptr
+ %wide.load = load <4 x i32>, ptr %3, align 4
+ %4 = getelementptr inbounds i32, ptr %vla.us.us, <4 x i32> %wide.load
+ %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %4, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ %5 = getelementptr inbounds i32, ptr %a, i32 %index
+ %6 = bitcast ptr %5 to ptr
+ store <4 x i32> %wide.masked.gather, ptr %6, align 4
%index.next = add nuw i32 %index, 4
%7 = icmp eq i32 %index.next, %n.vec
br i1 %7, label %middle.block, label %vector.body
@@ -412,12 +412,12 @@ for.body7.us.us.preheader: ; preds = %for.body.us.us, %mi
for.body7.us.us: ; preds = %for.body7.us.us.preheader, %for.body7.us.us
%j.137.us.us = phi i32 [ %inc12.us.us, %for.body7.us.us ], [ %j.137.us.us.ph, %for.body7.us.us.preheader ]
- %arrayidx8.us.us = getelementptr inbounds i32, i32* %b, i32 %j.137.us.us
- %8 = load i32, i32* %arrayidx8.us.us, align 4
- %arrayidx9.us.us = getelementptr inbounds i32, i32* %vla.us.us, i32 %8
- %9 = load i32, i32* %arrayidx9.us.us, align 4
- %arrayidx10.us.us = getelementptr inbounds i32, i32* %a, i32 %j.137.us.us
- store i32 %9, i32* %arrayidx10.us.us, align 4
+ %arrayidx8.us.us = getelementptr inbounds i32, ptr %b, i32 %j.137.us.us
+ %8 = load i32, ptr %arrayidx8.us.us, align 4
+ %arrayidx9.us.us = getelementptr inbounds i32, ptr %vla.us.us, i32 %8
+ %9 = load i32, ptr %arrayidx9.us.us, align 4
+ %arrayidx10.us.us = getelementptr inbounds i32, ptr %a, i32 %j.137.us.us
+ store i32 %9, ptr %arrayidx10.us.us, align 4
%inc12.us.us = add nuw nsw i32 %j.137.us.us, 1
%exitcond58.not = icmp eq i32 %inc12.us.us, %m
br i1 %exitcond58.not, label %for.cond5.for.end13_crit_edge.us.us, label %for.body7.us.us
@@ -437,5 +437,5 @@ for.end16: ; preds = %for.body, %for.cond
ret void
}
-declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
+declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
+declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1)
diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll
index a89d3522ca5f2..18c8a8a22ef22 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll
@@ -16,7 +16,7 @@
-define arm_aapcs_vfpcc void @push_out_mul_gather(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) {
+define arm_aapcs_vfpcc void @push_out_mul_gather(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) {
; CHECK-LABEL: push_out_mul_gather:
; CHECK: @ %bb.0: @ %vector.ph
; CHECK-NEXT: adr r3, .LCPI0_0
@@ -45,11 +45,11 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.ind = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ], [ %vec.ind.next, %vector.body ]
%0 = mul <4 x i32> %vec.ind, <i32 3, i32 3, i32 3, i32 3>
- %1 = getelementptr inbounds i32, i32* %data, <4 x i32> %0
- %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
- %2 = getelementptr inbounds i32, i32* %dst, i32 %index
- %3 = bitcast i32* %2 to <4 x i32>*
- store <4 x i32> %wide.masked.gather, <4 x i32>* %3, align 4
+ %1 = getelementptr inbounds i32, ptr %data, <4 x i32> %0
+ %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ %2 = getelementptr inbounds i32, ptr %dst, i32 %index
+ %3 = bitcast ptr %2 to ptr
+ store <4 x i32> %wide.masked.gather, ptr %3, align 4
%index.next = add i32 %index, 4
%vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8>
%4 = icmp eq i32 %index.next, %n.vec
@@ -59,7 +59,7 @@ end:
ret void;
}
-define arm_aapcs_vfpcc void @push_out_add_gather(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) {
+define arm_aapcs_vfpcc void @push_out_add_gather(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) {
; CHECK-LABEL: push_out_add_gather:
; CHECK: @ %bb.0: @ %vector.ph
; CHECK-NEXT: adr r3, .LCPI1_0
@@ -88,11 +88,11 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.ind = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ], [ %vec.ind.next, %vector.body ]
%0 = add <4 x i32> %vec.ind, <i32 6, i32 6, i32 6, i32 6>
- %1 = getelementptr inbounds i32, i32* %data, <4 x i32> %0
- %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
- %2 = getelementptr inbounds i32, i32* %dst, i32 %index
- %3 = bitcast i32* %2 to <4 x i32>*
- store <4 x i32> %wide.masked.gather, <4 x i32>* %3, align 4
+ %1 = getelementptr inbounds i32, ptr %data, <4 x i32> %0
+ %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ %2 = getelementptr inbounds i32, ptr %dst, i32 %index
+ %3 = bitcast ptr %2 to ptr
+ store <4 x i32> %wide.masked.gather, ptr %3, align 4
%index.next = add i32 %index, 4
%vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8>
%4 = icmp eq i32 %index.next, %n.vec
@@ -102,7 +102,7 @@ end:
ret void;
}
-define arm_aapcs_vfpcc void @push_out_mul_add_gather(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) {
+define arm_aapcs_vfpcc void @push_out_mul_add_gather(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) {
; CHECK-LABEL: push_out_mul_add_gather:
; CHECK: @ %bb.0: @ %vector.ph
; CHECK-NEXT: adr r3, .LCPI2_0
@@ -132,11 +132,11 @@ vector.body: ; preds = %vector.body, %vecto
%vec.ind = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ], [ %vec.ind.next, %vector.body ]
%0 = mul <4 x i32> %vec.ind, <i32 3, i32 3, i32 3, i32 3>
%1 = add <4 x i32> %0, <i32 6, i32 6, i32 6, i32 6>
- %2 = getelementptr inbounds i32, i32* %data, <4 x i32> %1
- %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
- %3 = getelementptr inbounds i32, i32* %dst, i32 %index
- %4 = bitcast i32* %3 to <4 x i32>*
- store <4 x i32> %wide.masked.gather, <4 x i32>* %4, align 4
+ %2 = getelementptr inbounds i32, ptr %data, <4 x i32> %1
+ %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ %3 = getelementptr inbounds i32, ptr %dst, i32 %index
+ %4 = bitcast ptr %3 to ptr
+ store <4 x i32> %wide.masked.gather, ptr %4, align 4
%index.next = add i32 %index, 4
%vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8>
%5 = icmp eq i32 %index.next, %n.vec
@@ -146,7 +146,7 @@ end:
ret void;
}
-define arm_aapcs_vfpcc void @push_out_mul_scatter(i32* noalias nocapture readonly %data,
+define arm_aapcs_vfpcc void @push_out_mul_scatter(ptr noalias nocapture readonly %data,
; CHECK-LABEL: push_out_mul_scatter:
; CHECK: @ %bb.0: @ %vector.ph
; CHECK-NEXT: adr r1, .LCPI3_0
@@ -166,7 +166,7 @@ define arm_aapcs_vfpcc void @push_out_mul_scatter(i32* noalias nocapture readonl
; CHECK-NEXT: .long 4294967224 @ 0xffffffb8
; CHECK-NEXT: .long 4294967248 @ 0xffffffd0
; CHECK-NEXT: .long 4294967272 @ 0xffffffe8
- i32* noalias nocapture %dst, i32 %n.vec,
+ ptr noalias nocapture %dst, i32 %n.vec,
<4 x i32> %to.store) {
vector.ph: ; preds = %for.body.preheader
@@ -176,8 +176,8 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.ind = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ], [ %vec.ind.next, %vector.body ]
%0 = mul <4 x i32> %vec.ind, <i32 3, i32 3, i32 3, i32 3>
- %1 = getelementptr inbounds i32, i32* %data, <4 x i32> %0
- call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %to.store, <4 x i32*> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+ %1 = getelementptr inbounds i32, ptr %data, <4 x i32> %0
+ call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %to.store, <4 x ptr> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
%index.next = add i32 %index, 4
%vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8>
%2 = icmp eq i32 %index.next, %n.vec
@@ -187,7 +187,7 @@ end:
ret void;
}
-define arm_aapcs_vfpcc void @push_out_add_scatter(i32* noalias nocapture readonly %data,
+define arm_aapcs_vfpcc void @push_out_add_scatter(ptr noalias nocapture readonly %data,
; CHECK-LABEL: push_out_add_scatter:
; CHECK: @ %bb.0: @ %vector.ph
; CHECK-NEXT: adr r1, .LCPI4_0
@@ -207,7 +207,7 @@ define arm_aapcs_vfpcc void @push_out_add_scatter(i32* noalias nocapture readonl
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 8 @ 0x8
; CHECK-NEXT: .long 16 @ 0x10
- i32* noalias nocapture %dst, i32 %n.vec,
+ ptr noalias nocapture %dst, i32 %n.vec,
<4 x i32> %to.store) {
vector.ph: ; preds = %for.body.preheader
@@ -217,8 +217,8 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.ind = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ], [ %vec.ind.next, %vector.body ]
%0 = add <4 x i32> %vec.ind, <i32 6, i32 6, i32 6, i32 6>
- %1 = getelementptr inbounds i32, i32* %data, <4 x i32> %0
- call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %to.store, <4 x i32*> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+ %1 = getelementptr inbounds i32, ptr %data, <4 x i32> %0
+ call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %to.store, <4 x ptr> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
%index.next = add i32 %index, 4
%vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8>
%2 = icmp eq i32 %index.next, %n.vec
@@ -228,7 +228,7 @@ end:
ret void;
}
-define arm_aapcs_vfpcc void @push_out_mul_gather_scatter(i32* noalias nocapture readonly %data,
+define arm_aapcs_vfpcc void @push_out_mul_gather_scatter(ptr noalias nocapture readonly %data,
; CHECK-LABEL: push_out_mul_gather_scatter:
; CHECK: @ %bb.0: @ %vector.ph
; CHECK-NEXT: adr r1, .LCPI5_0
@@ -251,7 +251,7 @@ define arm_aapcs_vfpcc void @push_out_mul_gather_scatter(i32* noalias nocapture
; CHECK-NEXT: .long 6 @ 0x6
; CHECK-NEXT: .long 12 @ 0xc
; CHECK-NEXT: .long 18 @ 0x12
- i32* noalias nocapture %dst, i32 %n.vec) {
+ ptr noalias nocapture %dst, i32 %n.vec) {
vector.ph: ; preds = %for.body.preheader
br label %vector.body
@@ -260,9 +260,9 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.ind = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ], [ %vec.ind.next, %vector.body ]
%0 = mul <4 x i32> %vec.ind, <i32 3, i32 3, i32 3, i32 3>
- %1 = getelementptr inbounds i32, i32* %data, <4 x i32> %0
- %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
- call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %wide.masked.gather, <4 x i32*> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+ %1 = getelementptr inbounds i32, ptr %data, <4 x i32> %0
+ %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %wide.masked.gather, <4 x ptr> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
%index.next = add i32 %index, 4
%vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8>
%2 = icmp eq i32 %index.next, %n.vec
@@ -272,7 +272,7 @@ end:
ret void;
}
-define arm_aapcs_vfpcc void @push_out_add_sub_block(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) {
+define arm_aapcs_vfpcc void @push_out_add_sub_block(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) {
; CHECK-LABEL: push_out_add_sub_block:
; CHECK: @ %bb.0: @ %vector.ph
; CHECK-NEXT: adr r3, .LCPI6_0
@@ -304,11 +304,11 @@ vector.body: ; preds = %vector.body, %vecto
lower.block: ; preds = %vector.body
%0 = add <4 x i32> %vec.ind, <i32 6, i32 6, i32 6, i32 6>
- %1 = getelementptr inbounds i32, i32* %data, <4 x i32> %0
- %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
- %2 = getelementptr inbounds i32, i32* %dst, i32 %index
- %3 = bitcast i32* %2 to <4 x i32>*
- store <4 x i32> %wide.masked.gather, <4 x i32>* %3, align 4
+ %1 = getelementptr inbounds i32, ptr %data, <4 x i32> %0
+ %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ %2 = getelementptr inbounds i32, ptr %dst, i32 %index
+ %3 = bitcast ptr %2 to ptr
+ store <4 x i32> %wide.masked.gather, ptr %3, align 4
%index.next = add i32 %index, 4
%vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8>
br label %vector.body.end
@@ -321,7 +321,7 @@ end:
ret void;
}
-define arm_aapcs_vfpcc void @non_gatscat_use1(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec, <4 x i32>* %x) {
+define arm_aapcs_vfpcc void @non_gatscat_use1(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec, ptr %x) {
; CHECK-LABEL: non_gatscat_use1:
; CHECK: @ %bb.0: @ %vector.ph
; CHECK-NEXT: .save {r4, lr}
@@ -365,13 +365,13 @@ vector.body: ; preds = %vector.body, %vecto
%vec.ind = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ], [ %vec.ind.next, %vector.body ]
%0 = mul <4 x i32> %vec.ind, <i32 3, i32 3, i32 3, i32 3>
%1 = add <4 x i32> %0, <i32 6, i32 6, i32 6, i32 6>
- %2 = getelementptr inbounds i32, i32* %data, <4 x i32> %1
- %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
- %3 = getelementptr inbounds i32, i32* %dst, i32 %index
- %4 = bitcast i32* %3 to <4 x i32>*
- store <4 x i32> %wide.masked.gather, <4 x i32>* %4, align 4
+ %2 = getelementptr inbounds i32, ptr %data, <4 x i32> %1
+ %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ %3 = getelementptr inbounds i32, ptr %dst, i32 %index
+ %4 = bitcast ptr %3 to ptr
+ store <4 x i32> %wide.masked.gather, ptr %4, align 4
%non_gatscat_use = mul <4 x i32> %0, <i32 3, i32 3, i32 3, i32 3>
- store <4 x i32> %non_gatscat_use, <4 x i32>* %x, align 4
+ store <4 x i32> %non_gatscat_use, ptr %x, align 4
%index.next = add i32 %index, 4
%vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8>
%5 = icmp eq i32 %index.next, %n.vec
@@ -381,7 +381,7 @@ end:
ret void;
}
-define arm_aapcs_vfpcc void @non_gatscat_use2(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec, <4 x i32>* %x) {
+define arm_aapcs_vfpcc void @non_gatscat_use2(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec, ptr %x) {
; CHECK-LABEL: non_gatscat_use2:
; CHECK: @ %bb.0: @ %vector.ph
; CHECK-NEXT: .save {r4, r5, r7, lr}
@@ -428,13 +428,13 @@ vector.body: ; preds = %vector.body, %vecto
%vec.ind = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ], [ %vec.ind.next, %vector.body ]
%0 = mul <4 x i32> %vec.ind, <i32 3, i32 3, i32 3, i32 3>
%1 = add <4 x i32> %0, <i32 6, i32 6, i32 6, i32 6>
- %2 = getelementptr inbounds i32, i32* %data, <4 x i32> %1
- %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
- %3 = getelementptr inbounds i32, i32* %dst, i32 %index
- %4 = bitcast i32* %3 to <4 x i32>*
- store <4 x i32> %wide.masked.gather, <4 x i32>* %4, align 4
+ %2 = getelementptr inbounds i32, ptr %data, <4 x i32> %1
+ %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ %3 = getelementptr inbounds i32, ptr %dst, i32 %index
+ %4 = bitcast ptr %3 to ptr
+ store <4 x i32> %wide.masked.gather, ptr %4, align 4
%non_gatscat_use = mul <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
- store <4 x i32> %non_gatscat_use, <4 x i32>* %x, align 4
+ store <4 x i32> %non_gatscat_use, ptr %x, align 4
%index.next = add i32 %index, 4
%vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8>
%5 = icmp eq i32 %index.next, %n.vec
@@ -444,7 +444,7 @@ end:
ret void;
}
-define dso_local void @arm_mat_mult_q31(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B, i32* noalias nocapture %C, i32 %n, i32 %m, i32 %l) local_unnamed_addr #0 {
+define dso_local void @arm_mat_mult_q31(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %n, i32 %m, i32 %l) local_unnamed_addr #0 {
; CHECK-LABEL: arm_mat_mult_q31:
; CHECK: @ %bb.0: @ %for.cond8.preheader.us.us.preheader.preheader
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
@@ -568,12 +568,12 @@ vector.body: ; preds = %vector.body, %vecto
%vec.ind = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ], [ %vec.ind.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %9, %vector.body ]
%3 = add <4 x i32> %vec.ind, %broadcast.splat
- %4 = getelementptr inbounds i32, i32* %A, <4 x i32> %3
- %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %4, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), !tbaa !3
+ %4 = getelementptr inbounds i32, ptr %A, <4 x i32> %3
+ %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %4, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), !tbaa !3
%5 = mul <4 x i32> %vec.ind, %broadcast.splat87
%6 = add <4 x i32> %5, %broadcast.splat89
- %7 = getelementptr inbounds i32, i32* %B, <4 x i32> %6
- %wide.masked.gather90 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %7, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), !tbaa !3
+ %7 = getelementptr inbounds i32, ptr %B, <4 x i32> %6
+ %wide.masked.gather90 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %7, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), !tbaa !3
%8 = mul nsw <4 x i32> %wide.masked.gather90, %wide.masked.gather
%9 = add <4 x i32> %8, %vec.phi
%index.next = add i32 %index, 4
@@ -585,8 +585,8 @@ middle.block: ; preds = %vector.body
%11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %9)
;for.cond8.for.cond.cleanup10_crit_edge.us.us: ; preds = %for.body11.us.us, %middle.block
%add19.us.us = add i32 %j.051.us.us, %mul18.us
- %arrayidx20.us.us = getelementptr inbounds i32, i32* %C, i32 %add19.us.us
- store i32 %11, i32* %arrayidx20.us.us, align 4, !tbaa !3
+ %arrayidx20.us.us = getelementptr inbounds i32, ptr %C, i32 %add19.us.us
+ store i32 %11, ptr %arrayidx20.us.us, align 4, !tbaa !3
%inc.us.us = add nuw nsw i32 %j.051.us.us, 1
%exitcond = icmp eq i32 %inc.us.us, %m
br i1 %exitcond, label %for.cond4.for.cond.cleanup6_crit_edge.us, label %vector.ph
@@ -595,7 +595,7 @@ for.end25: ; preds = %for.cond4.for.cond.
ret void
}
-define dso_local void @arm_mat_mult_q15(i16* noalias nocapture readonly %A, i16* noalias nocapture readonly %B, i16* noalias nocapture %C, i32 %n, i32 %m, i32 %l) local_unnamed_addr #0 {
+define dso_local void @arm_mat_mult_q15(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %n, i32 %m, i32 %l) local_unnamed_addr #0 {
; CHECK-LABEL: arm_mat_mult_q15:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -772,9 +772,9 @@ for.cond1.preheader.us: ; preds = %for.cond1.for.cond.
br i1 %cmp642, label %for.cond5.preheader.us73.preheader, label %for.cond5.preheader.us.us
for.cond5.preheader.us73.preheader: ; preds = %for.cond1.preheader.us
- %scevgep = getelementptr i16, i16* %C, i32 %1
- %scevgep82 = bitcast i16* %scevgep to i8*
- call void @llvm.memset.p0i8.i32(i8* align 2 %scevgep82, i8 0, i32 %0, i1 false)
+ %scevgep = getelementptr i16, ptr %C, i32 %1
+ %scevgep82 = bitcast ptr %scevgep to ptr
+ call void @llvm.memset.p0.i32(ptr align 2 %scevgep82, i8 0, i32 %0, i1 false)
br label %for.cond1.for.cond.cleanup3_crit_edge.us
for.cond1.for.cond.cleanup3_crit_edge.us: ; preds = %for.cond5.for.cond.cleanup7_crit_edge.us.us, %for.cond5.preheader.us73.preheader
@@ -801,14 +801,14 @@ vector.body: ; preds = %vector.body, %vecto
%vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ], [ %vec.ind.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %11, %vector.body ]
%2 = add i32 %index, %mul.us
- %3 = getelementptr inbounds i16, i16* %A, i32 %2
- %4 = bitcast i16* %3 to <4 x i16>*
- %wide.load = load <4 x i16>, <4 x i16>* %4, align 2, !tbaa !3
+ %3 = getelementptr inbounds i16, ptr %A, i32 %2
+ %4 = bitcast ptr %3 to ptr
+ %wide.load = load <4 x i16>, ptr %4, align 2, !tbaa !3
%5 = sext <4 x i16> %wide.load to <4 x i32>
%6 = mul <4 x i32> %vec.ind, %broadcast.splat
%7 = add <4 x i32> %6, %broadcast.splat86
- %8 = getelementptr inbounds i16, i16* %B, <4 x i32> %7
- %wide.masked.gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %8, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef), !tbaa !3
+ %8 = getelementptr inbounds i16, ptr %B, <4 x i32> %7
+ %wide.masked.gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %8, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef), !tbaa !3
%9 = sext <4 x i16> %wide.masked.gather to <4 x i32>
%10 = mul nsw <4 x i32> %9, %5
%11 = add <4 x i32> %10, %vec.phi
@@ -825,8 +825,8 @@ for.cond5.for.cond.cleanup7_crit_edge.us.us: ; preds = %for.body8.us.us, %m
%add14.us.us.lcssa = phi i32 [ %13, %middle.block ], [ %add14.us.us, %for.body8.us.us ]
%conv15.us.us = trunc i32 %add14.us.us.lcssa to i16
%add17.us.us = add i32 %j.046.us.us, %1
- %arrayidx18.us.us = getelementptr inbounds i16, i16* %C, i32 %add17.us.us
- store i16 %conv15.us.us, i16* %arrayidx18.us.us, align 2, !tbaa !3
+ %arrayidx18.us.us = getelementptr inbounds i16, ptr %C, i32 %add17.us.us
+ store i16 %conv15.us.us, ptr %arrayidx18.us.us, align 2, !tbaa !3
%inc20.us.us = add nuw nsw i32 %j.046.us.us, 1
%exitcond83 = icmp eq i32 %inc20.us.us, %m
br i1 %exitcond83, label %for.cond1.for.cond.cleanup3_crit_edge.us, label %for.cond5.preheader.us.us
@@ -835,13 +835,13 @@ for.body8.us.us: ; preds = %for.body8.us.us.pre
%k.044.us.us = phi i32 [ %inc.us.us, %for.body8.us.us ], [ %k.044.us.us.ph, %for.body8.us.us.preheader ]
%sum.043.us.us = phi i32 [ %add14.us.us, %for.body8.us.us ], [ %sum.043.us.us.ph, %for.body8.us.us.preheader ]
%add.us.us = add i32 %k.044.us.us, %mul.us
- %arrayidx.us.us = getelementptr inbounds i16, i16* %A, i32 %add.us.us
- %14 = load i16, i16* %arrayidx.us.us, align 2, !tbaa !3
+ %arrayidx.us.us = getelementptr inbounds i16, ptr %A, i32 %add.us.us
+ %14 = load i16, ptr %arrayidx.us.us, align 2, !tbaa !3
%conv.us.us = sext i16 %14 to i32
%mul9.us.us = mul i32 %k.044.us.us, %m
%add10.us.us = add i32 %mul9.us.us, %j.046.us.us
- %arrayidx11.us.us = getelementptr inbounds i16, i16* %B, i32 %add10.us.us
- %15 = load i16, i16* %arrayidx11.us.us, align 2, !tbaa !3
+ %arrayidx11.us.us = getelementptr inbounds i16, ptr %B, i32 %add10.us.us
+ %15 = load i16, ptr %arrayidx11.us.us, align 2, !tbaa !3
%conv12.us.us = sext i16 %15 to i32
%mul13.us.us = mul nsw i32 %conv12.us.us, %conv.us.us
%add14.us.us = add nsw i32 %mul13.us.us, %sum.043.us.us
@@ -853,7 +853,7 @@ for.cond.cleanup: ; preds = %for.cond1.for.cond.
ret void
}
-define hidden arm_aapcs_vfpcc i32 @arm_depthwise_conv_s8(i8* nocapture readonly %input, i16 zeroext %input_x, i16 zeroext %input_y, i16 zeroext %input_ch, i8* nocapture readonly %kernel, i16 zeroext %output_ch, i16 zeroext %ch_mult, i16 zeroext %kernel_x, i16 zeroext %kernel_y, i16 zeroext %pad_x, i16 zeroext %pad_y, i16 zeroext %stride_x, i16 zeroext %stride_y, i32* nocapture readonly %bias, i8* nocapture %output, i32* nocapture readonly %output_shift, i32* nocapture readonly %output_mult, i16 zeroext %output_x, i16 zeroext %output_y, i32 %output_offset, i32 %input_offset, i32 %output_activation_min, i32 %output_activation_max, i16 zeroext %dilation_x, i16 zeroext %dilation_y, i16* nocapture readnone %buffer_a) local_unnamed_addr #0 {
+define hidden arm_aapcs_vfpcc i32 @arm_depthwise_conv_s8(ptr nocapture readonly %input, i16 zeroext %input_x, i16 zeroext %input_y, i16 zeroext %input_ch, ptr nocapture readonly %kernel, i16 zeroext %output_ch, i16 zeroext %ch_mult, i16 zeroext %kernel_x, i16 zeroext %kernel_y, i16 zeroext %pad_x, i16 zeroext %pad_y, i16 zeroext %stride_x, i16 zeroext %stride_y, ptr nocapture readonly %bias, ptr nocapture %output, ptr nocapture readonly %output_shift, ptr nocapture readonly %output_mult, i16 zeroext %output_x, i16 zeroext %output_y, i32 %output_offset, i32 %input_offset, i32 %output_activation_min, i32 %output_activation_max, i16 zeroext %dilation_x, i16 zeroext %dilation_y, ptr nocapture readnone %buffer_a) local_unnamed_addr #0 {
; CHECK-LABEL: arm_depthwise_conv_s8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -1074,12 +1074,12 @@ vector.body: ; preds = %vector.body, %vecto
%tmp79 = add nsw <4 x i32> %vec.ind, %broadcast.splat68
%tmp80 = mul nsw <4 x i32> %broadcast.splat70, %tmp79
%tmp81 = add nsw <4 x i32> %tmp80, %broadcast.splat72
- %tmp82 = getelementptr inbounds i8, i8* %input, <4 x i32> %tmp78
- %wide.masked.gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %tmp82, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
+ %tmp82 = getelementptr inbounds i8, ptr %input, <4 x i32> %tmp78
+ %wide.masked.gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %tmp82, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
%tmp83 = sext <4 x i8> %wide.masked.gather to <4 x i32>
%tmp84 = add nsw <4 x i32> %broadcast.splat74, %tmp83
- %tmp85 = getelementptr inbounds i8, i8* %kernel, <4 x i32> %tmp81
- %wide.masked.gather75 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %tmp85, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
+ %tmp85 = getelementptr inbounds i8, ptr %kernel, <4 x i32> %tmp81
+ %wide.masked.gather75 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %tmp85, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
%tmp86 = sext <4 x i8> %wide.masked.gather75 to <4 x i32>
%tmp87 = mul nsw <4 x i32> %tmp84, %tmp86
%tmp88 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp87)
@@ -1104,11 +1104,11 @@ if.end: ; preds = %for.cond.cleanup9.i
ret i32 0
}
-declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
-declare <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*>, i32, <4 x i1>, <4 x i16>)
-declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32 immarg, <4 x i1>, <4 x i8>) #3
+declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
+declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>)
+declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32 immarg, <4 x i1>, <4 x i8>) #3
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
-declare void @llvm.memset.p0i8.i32(i8* align 2, i8, i32, i1)
+declare void @llvm.memset.p0.i32(ptr align 2, i8, i32, i1)
-declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll
index 66c41bb32dee6..4b84529414809 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst -enable-mem-access-versioning=false -tail-predication=force-enabled %s -o - | FileCheck %s
-define dso_local void @mve_gather_qi_wb(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B, i32* noalias nocapture %C, i32 %n, i32 %m, i32 %l) {
+define dso_local void @mve_gather_qi_wb(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %n, i32 %m, i32 %l) {
; CHECK-LABEL: mve_gather_qi_wb:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
@@ -33,21 +33,21 @@ define dso_local void @mve_gather_qi_wb(i32* noalias nocapture readonly %A, i32*
; CHECK-NEXT: .long 4294967288 @ 0xfffffff8
entry: ; preds = %middle.
%add.us.us = add i32 4, %n
- %arrayidx.us.us = getelementptr inbounds i32, i32* %C, i32 %add.us.us
+ %arrayidx.us.us = getelementptr inbounds i32, ptr %C, i32 %add.us.us
br label %vector.body
vector.body: ; preds = %vector.body, %entry
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %entry ], [ %7, %vector.body ]
%vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %entry ], [ %vec.ind.next, %vector.body ]
%0 = add i32 %index, %n
- %1 = getelementptr inbounds i32, i32* %A, i32 %0
+ %1 = getelementptr inbounds i32, ptr %A, i32 %0
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n)
- %2 = bitcast i32* %1 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+ %2 = bitcast ptr %1 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %2, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
%3 = mul <4 x i32> %vec.ind, <i32 5, i32 5, i32 5, i32 5>
%4 = add <4 x i32> %3, <i32 3, i32 3, i32 3, i32 3>
- %5 = getelementptr inbounds i32, i32* %B, <4 x i32> %4
- %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %5, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+ %5 = getelementptr inbounds i32, ptr %B, <4 x i32> %4
+ %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %5, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
%6 = mul nsw <4 x i32> %wide.masked.gather, %wide.masked.load
%7 = add <4 x i32> %vec.phi, %6
%index.next = add i32 %index, 4
@@ -57,7 +57,7 @@ vector.body: ; preds = %vector.body, %entry
middle.block: ; preds = %vector.body
%9 = select <4 x i1> %active.lane.mask, <4 x i32> %7, <4 x i32> %vec.phi
%10 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %7)
- store i32 %10, i32* %arrayidx.us.us, align 4
+ store i32 %10, ptr %arrayidx.us.us, align 4
%inc21.us.us = add nuw i32 4, 1
%exitcond81.not = icmp eq i32 %inc21.us.us, %n
br label %end
@@ -65,7 +65,7 @@ end: ; preds = %middle.block
ret void
}
-define dso_local void @mve_gatherscatter_offset(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B, i32* noalias nocapture %C, i32 %n, i32 %m, i32 %l) {
+define dso_local void @mve_gatherscatter_offset(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %n, i32 %m, i32 %l) {
; CHECK-LABEL: mve_gatherscatter_offset:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
@@ -102,23 +102,23 @@ define dso_local void @mve_gatherscatter_offset(i32* noalias nocapture readonly
; CHECK-NEXT: .long 18 @ 0x12
entry: ; preds = %middle.
%add.us.us = add i32 4, %n
- %arrayidx.us.us = getelementptr inbounds i32, i32* %C, i32 %add.us.us
+ %arrayidx.us.us = getelementptr inbounds i32, ptr %C, i32 %add.us.us
br label %vector.body
vector.body: ; preds = %vector.body, %entry
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %entry ], [ %7, %vector.body ]
%vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %entry ], [ %vec.ind.next, %vector.body ]
%0 = add i32 %index, %n
- %1 = getelementptr inbounds i32, i32* %A, i32 %0
+ %1 = getelementptr inbounds i32, ptr %A, i32 %0
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n)
- %2 = bitcast i32* %1 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+ %2 = bitcast ptr %1 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %2, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
%3 = mul <4 x i32> %vec.ind, <i32 5, i32 5, i32 5, i32 5>
%4 = add <4 x i32> %3, <i32 3, i32 3, i32 3, i32 3>
- %5 = getelementptr inbounds i32, i32* %B, <4 x i32> %4
- %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %5, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+ %5 = getelementptr inbounds i32, ptr %B, <4 x i32> %4
+ %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %5, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
%6 = mul nsw <4 x i32> %wide.masked.gather, %wide.masked.load
- call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %6, <4 x i32*> %5, i32 4, <4 x i1> %active.lane.mask)
+ call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %6, <4 x ptr> %5, i32 4, <4 x i1> %active.lane.mask)
%7 = add <4 x i32> %vec.phi, %6
%index.next = add i32 %index, 4
%vec.ind.next = add <4 x i32> %vec.ind, <i32 4, i32 4, i32 4, i32 4>
@@ -127,7 +127,7 @@ vector.body: ; preds = %vector.body, %entry
middle.block: ; preds = %vector.body
%9 = select <4 x i1> %active.lane.mask, <4 x i32> %7, <4 x i32> %vec.phi
%10 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %7)
- store i32 %10, i32* %arrayidx.us.us, align 4
+ store i32 %10, ptr %arrayidx.us.us, align 4
%inc21.us.us = add nuw i32 4, 1
%exitcond81.not = icmp eq i32 %inc21.us.us, %n
br label %end
@@ -135,7 +135,7 @@ end: ; preds = %middle.block
ret void
}
-define dso_local void @mve_scatter_qi(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B, i32* noalias nocapture %C, i32 %n, i32 %m, i32 %l) {
+define dso_local void @mve_scatter_qi(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %n, i32 %m, i32 %l) {
; CHECK-LABEL: mve_scatter_qi:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
@@ -172,22 +172,22 @@ define dso_local void @mve_scatter_qi(i32* noalias nocapture readonly %A, i32* n
; CHECK-NEXT: .long 4294967288 @ 0xfffffff8
entry: ; preds = %middle.
%add.us.us = add i32 4, %n
- %arrayidx.us.us = getelementptr inbounds i32, i32* %C, i32 %add.us.us
+ %arrayidx.us.us = getelementptr inbounds i32, ptr %C, i32 %add.us.us
br label %vector.body
vector.body: ; preds = %vector.body, %entry
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %entry ], [ %7, %vector.body ]
%vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %entry ], [ %vec.ind.next, %vector.body ]
%0 = add i32 %index, %n
- %1 = getelementptr inbounds i32, i32* %A, i32 %0
+ %1 = getelementptr inbounds i32, ptr %A, i32 %0
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n)
- %2 = bitcast i32* %1 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+ %2 = bitcast ptr %1 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %2, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
%3 = mul <4 x i32> %vec.ind, <i32 5, i32 5, i32 5, i32 5>
%4 = add <4 x i32> %3, <i32 3, i32 3, i32 3, i32 3>
- %5 = getelementptr inbounds i32, i32* %B, <4 x i32> %4
+ %5 = getelementptr inbounds i32, ptr %B, <4 x i32> %4
%6 = mul nsw <4 x i32> <i32 3, i32 3, i32 3, i32 3>, %wide.masked.load
- call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %6, <4 x i32*> %5, i32 4, <4 x i1> %active.lane.mask)
+ call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %6, <4 x ptr> %5, i32 4, <4 x i1> %active.lane.mask)
%7 = add <4 x i32> %vec.phi, %6
%index.next = add i32 %index, 4
%vec.ind.next = add <4 x i32> %vec.ind, <i32 4, i32 4, i32 4, i32 4>
@@ -196,7 +196,7 @@ vector.body: ; preds = %vector.body, %entry
middle.block: ; preds = %vector.body
%9 = select <4 x i1> %active.lane.mask, <4 x i32> %7, <4 x i32> %vec.phi
%10 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %7)
- store i32 %10, i32* %arrayidx.us.us, align 4
+ store i32 %10, ptr %arrayidx.us.us, align 4
%inc21.us.us = add nuw i32 4, 1
%exitcond81.not = icmp eq i32 %inc21.us.us, %n
br label %end
@@ -204,7 +204,7 @@ end: ; preds = %middle.block
ret void
}
-define void @justoffsets(i8* noalias nocapture readonly %r, i8* noalias nocapture %w, i32 %N) {
+define void @justoffsets(ptr noalias nocapture readonly %r, ptr noalias nocapture %w, i32 %N) {
; CHECK-LABEL: justoffsets:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -293,17 +293,17 @@ vector.ph: ; preds = %vector.memcheck
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %pointer.phi = phi i8* [ %r, %vector.ph ], [ %ptr.ind, %vector.body ]
- %pointer.phi55 = phi i8* [ %w, %vector.ph ], [ %ptr.ind56, %vector.body ]
+ %pointer.phi = phi ptr [ %r, %vector.ph ], [ %ptr.ind, %vector.body ]
+ %pointer.phi55 = phi ptr [ %w, %vector.ph ], [ %ptr.ind56, %vector.body ]
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %l1 = getelementptr i8, i8* %pointer.phi, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
- %l2 = getelementptr i8, i8* %pointer.phi55, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
- %l3 = getelementptr inbounds i8, <4 x i8*> %l1, i32 1
+ %l1 = getelementptr i8, ptr %pointer.phi, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+ %l2 = getelementptr i8, ptr %pointer.phi55, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+ %l3 = getelementptr inbounds i8, <4 x ptr> %l1, i32 1
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
- %wide.masked.gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %l1, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef)
- %l4 = getelementptr inbounds i8, <4 x i8*> %l1, i32 2
- %wide.masked.gather57 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %l3, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef)
- %wide.masked.gather58 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %l4, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef)
+ %wide.masked.gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %l1, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef)
+ %l4 = getelementptr inbounds i8, <4 x ptr> %l1, i32 2
+ %wide.masked.gather57 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %l3, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef)
+ %wide.masked.gather58 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %l4, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef)
%l5 = zext <4 x i8> %wide.masked.gather to <4 x i32>
%l6 = mul nuw nsw <4 x i32> %l5, <i32 19595, i32 19595, i32 19595, i32 19595>
%l7 = zext <4 x i8> %wide.masked.gather57 to <4 x i32>
@@ -331,15 +331,15 @@ vector.body: ; preds = %vector.body, %vecto
%l29 = add nsw <4 x i32> %l28, %l26
%l30 = lshr <4 x i32> %l29, <i32 16, i32 16, i32 16, i32 16>
%l31 = trunc <4 x i32> %l30 to <4 x i8>
- %l32 = getelementptr inbounds i8, <4 x i8*> %l2, i32 1
- call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %l15, <4 x i8*> %l2, i32 1, <4 x i1> %active.lane.mask)
- %l33 = getelementptr inbounds i8, <4 x i8*> %l2, i32 2
- call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %l23, <4 x i8*> %l32, i32 1, <4 x i1> %active.lane.mask)
- call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %l31, <4 x i8*> %l33, i32 1, <4 x i1> %active.lane.mask)
+ %l32 = getelementptr inbounds i8, <4 x ptr> %l2, i32 1
+ call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %l15, <4 x ptr> %l2, i32 1, <4 x i1> %active.lane.mask)
+ %l33 = getelementptr inbounds i8, <4 x ptr> %l2, i32 2
+ call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %l23, <4 x ptr> %l32, i32 1, <4 x i1> %active.lane.mask)
+ call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %l31, <4 x ptr> %l33, i32 1, <4 x i1> %active.lane.mask)
%index.next = add i32 %index, 4
%l34 = icmp eq i32 %index.next, %n.vec
- %ptr.ind = getelementptr i8, i8* %pointer.phi, i32 12
- %ptr.ind56 = getelementptr i8, i8* %pointer.phi55, i32 12
+ %ptr.ind = getelementptr i8, ptr %pointer.phi, i32 12
+ %ptr.ind56 = getelementptr i8, ptr %pointer.phi55, i32 12
br i1 %l34, label %for.cond.cleanup, label %vector.body
for.cond.cleanup: ; preds = %vector.body, %for.body, %entry
@@ -347,9 +347,9 @@ for.cond.cleanup: ; preds = %vector.body, %for.b
}
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
-declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
-declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32, <4 x i1>, <4 x i8>)
+declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
+declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i8>)
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
-declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
-declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8>, <4 x i8*>, i32, <4 x i1>)
+declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>)
+declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32, <4 x i1>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-phireg.ll b/llvm/test/CodeGen/Thumb2/mve-phireg.ll
index d2f79fcd5fd98..dad856c0677a1 100644
--- a/llvm/test/CodeGen/Thumb2/mve-phireg.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-phireg.ll
@@ -92,8 +92,8 @@ vector.body: ; preds = %vector.body, %entry
%0 = and <8 x i32> %vec.ind, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%1 = icmp eq <8 x i32> %0, zeroinitializer
%2 = select <8 x i1> %1, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>
- %3 = bitcast i16* undef to <8 x i16>*
- store <8 x i16> %2, <8 x i16>* %3, align 2
+ %3 = bitcast ptr undef to ptr
+ store <8 x i16> %2, ptr %3, align 2
%4 = icmp eq i32 undef, 128
br i1 %4, label %for.cond4.preheader, label %vector.body
@@ -101,7 +101,7 @@ for.cond4.preheader: ; preds = %vector.body
br i1 undef, label %vector.body105, label %for.body10
for.cond4.loopexit: ; preds = %for.body10
- %call5 = call arm_aapcs_vfpcc i32 bitcast (i32 (...)* @l to i32 ()*)()
+ %call5 = call arm_aapcs_vfpcc i32 @l()
br label %vector.body105
for.body10: ; preds = %for.body10, %for.cond4.preheader
@@ -128,7 +128,7 @@ vector.body115: ; preds = %vector.body115, %ve
@a = external dso_local global i32, align 4
- at b = dso_local local_unnamed_addr global i32 ptrtoint (i32* @a to i32), align 4
+ at b = dso_local local_unnamed_addr global i32 ptrtoint (ptr @a to i32), align 4
@c = dso_local global i32 2, align 4
@d = dso_local global i32 2, align 4
@@ -211,43 +211,43 @@ define dso_local i32 @e() #0 {
; CHECK-NEXT: .long 0x00000000 @ float 0
entry:
%f = alloca i16, align 2
- %g = alloca [3 x [8 x [4 x i16*]]], align 4
- store i16 4, i16* %f, align 2
- %0 = load i32, i32* @c, align 4
- %1 = load i32, i32* @d, align 4
- %arrayinit.element7 = getelementptr inbounds [3 x [8 x [4 x i16*]]], [3 x [8 x [4 x i16*]]]* %g, i32 0, i32 0, i32 1, i32 1
- %2 = bitcast i16** %arrayinit.element7 to i32*
- store i32 %0, i32* %2, align 4
- %arrayinit.element8 = getelementptr inbounds [3 x [8 x [4 x i16*]]], [3 x [8 x [4 x i16*]]]* %g, i32 0, i32 0, i32 1, i32 2
- store i16* null, i16** %arrayinit.element8, align 4
- %3 = bitcast i16** undef to i32*
- store i32 %1, i32* %3, align 4
- %4 = bitcast i16** undef to i32*
- store i32 %0, i32* %4, align 4
- %arrayinit.element13 = getelementptr inbounds [3 x [8 x [4 x i16*]]], [3 x [8 x [4 x i16*]]]* %g, i32 0, i32 0, i32 2, i32 2
- %5 = bitcast i16** %arrayinit.element13 to <4 x i16*>*
- store <4 x i16*> <i16* inttoptr (i32 4 to i16*), i16* bitcast (i32* @c to i16*), i16* bitcast (i32* @c to i16*), i16* null>, <4 x i16*>* %5, align 4
- %arrayinit.element24 = getelementptr inbounds [3 x [8 x [4 x i16*]]], [3 x [8 x [4 x i16*]]]* %g, i32 0, i32 0, i32 4, i32 2
- %6 = bitcast i16** %arrayinit.element24 to <4 x i16*>*
- store <4 x i16*> <i16* bitcast (i32* @d to i16*), i16* null, i16* bitcast (i32* @d to i16*), i16* bitcast (i32 ()* @e to i16*)>, <4 x i16*>* %6, align 4
- %7 = bitcast i16** undef to <4 x i16*>*
- store <4 x i16*> <i16* inttoptr (i32 4 to i16*), i16* bitcast (i32 ()* @e to i16*), i16* bitcast (i32* @c to i16*), i16* null>, <4 x i16*>* %7, align 4
- %8 = bitcast i16** undef to <4 x i16*>*
- store <4 x i16*> <i16* bitcast (i32* @c to i16*), i16* bitcast (i32 ()* @e to i16*), i16* bitcast (i32* @c to i16*), i16* bitcast (i32* @c to i16*)>, <4 x i16*>* %8, align 4
- %9 = bitcast i16** undef to <4 x i16*>*
- store <4 x i16*> <i16* bitcast (i32 ()* @e to i16*), i16* bitcast (i32* @c to i16*), i16* bitcast (i32* @c to i16*), i16* bitcast (i32* @c to i16*)>, <4 x i16*>* %9, align 4
- %10 = bitcast i16** undef to <4 x i16*>*
- store <4 x i16*> <i16* bitcast (i32* @c to i16*), i16* bitcast (i32* @c to i16*), i16* null, i16* bitcast (i32 ()* @e to i16*)>, <4 x i16*>* %10, align 4
- call void @llvm.memset.p0i8.i32(i8* nonnull align 4 dereferenceable(64) undef, i8 0, i32 64, i1 false)
- %11 = bitcast i16** undef to <4 x i16*>*
- store <4 x i16*> <i16* bitcast (i32* @d to i16*), i16* bitcast (i32 ()* @e to i16*), i16* bitcast (i32* @c to i16*), i16* bitcast (i32* @d to i16*)>, <4 x i16*>* %11, align 4
- %12 = bitcast i16** undef to <4 x i16*>*
- store <4 x i16*> <i16* null, i16* bitcast (i32* @c to i16*), i16* bitcast (i32* @c to i16*), i16* bitcast (i32* @c to i16*)>, <4 x i16*>* %12, align 4
- %13 = bitcast i16** undef to <4 x i16*>*
- store <4 x i16*> <i16* bitcast (i32* @c to i16*), i16* bitcast (i32* @d to i16*), i16* bitcast (i32* @c to i16*), i16* null>, <4 x i16*>* %13, align 4
- %arrayinit.begin78 = getelementptr inbounds [3 x [8 x [4 x i16*]]], [3 x [8 x [4 x i16*]]]* %g, i32 0, i32 2, i32 3, i32 0
- store i16* inttoptr (i32 4 to i16*), i16** %arrayinit.begin78, align 4
- store i32 0, i32* @b, align 4
+ %g = alloca [3 x [8 x [4 x ptr]]], align 4
+ store i16 4, ptr %f, align 2
+ %0 = load i32, ptr @c, align 4
+ %1 = load i32, ptr @d, align 4
+ %arrayinit.element7 = getelementptr inbounds [3 x [8 x [4 x ptr]]], ptr %g, i32 0, i32 0, i32 1, i32 1
+ %2 = bitcast ptr %arrayinit.element7 to ptr
+ store i32 %0, ptr %2, align 4
+ %arrayinit.element8 = getelementptr inbounds [3 x [8 x [4 x ptr]]], ptr %g, i32 0, i32 0, i32 1, i32 2
+ store ptr null, ptr %arrayinit.element8, align 4
+ %3 = bitcast ptr undef to ptr
+ store i32 %1, ptr %3, align 4
+ %4 = bitcast ptr undef to ptr
+ store i32 %0, ptr %4, align 4
+ %arrayinit.element13 = getelementptr inbounds [3 x [8 x [4 x ptr]]], ptr %g, i32 0, i32 0, i32 2, i32 2
+ %5 = bitcast ptr %arrayinit.element13 to ptr
+ store <4 x ptr> <ptr inttoptr (i32 4 to ptr), ptr @c, ptr @c, ptr null>, ptr %5, align 4
+ %arrayinit.element24 = getelementptr inbounds [3 x [8 x [4 x ptr]]], ptr %g, i32 0, i32 0, i32 4, i32 2
+ %6 = bitcast ptr %arrayinit.element24 to ptr
+ store <4 x ptr> <ptr @d, ptr null, ptr @d, ptr @e>, ptr %6, align 4
+ %7 = bitcast ptr undef to ptr
+ store <4 x ptr> <ptr inttoptr (i32 4 to ptr), ptr @e, ptr @c, ptr null>, ptr %7, align 4
+ %8 = bitcast ptr undef to ptr
+ store <4 x ptr> <ptr @c, ptr @e, ptr @c, ptr @c>, ptr %8, align 4
+ %9 = bitcast ptr undef to ptr
+ store <4 x ptr> <ptr @e, ptr @c, ptr @c, ptr @c>, ptr %9, align 4
+ %10 = bitcast ptr undef to ptr
+ store <4 x ptr> <ptr @c, ptr @c, ptr null, ptr @e>, ptr %10, align 4
+ call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(64) undef, i8 0, i32 64, i1 false)
+ %11 = bitcast ptr undef to ptr
+ store <4 x ptr> <ptr @d, ptr @e, ptr @c, ptr @d>, ptr %11, align 4
+ %12 = bitcast ptr undef to ptr
+ store <4 x ptr> <ptr null, ptr @c, ptr @c, ptr @c>, ptr %12, align 4
+ %13 = bitcast ptr undef to ptr
+ store <4 x ptr> <ptr @c, ptr @d, ptr @c, ptr null>, ptr %13, align 4
+ %arrayinit.begin78 = getelementptr inbounds [3 x [8 x [4 x ptr]]], ptr %g, i32 0, i32 2, i32 3, i32 0
+ store ptr inttoptr (i32 4 to ptr), ptr %arrayinit.begin78, align 4
+ store i32 0, ptr @b, align 4
br label %for.cond
for.cond: ; preds = %for.cond, %entry
@@ -255,10 +255,10 @@ for.cond: ; preds = %for.cond, %entry
}
; Function Attrs: argmemonly nounwind willreturn
-declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg) #1
+declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg) #1
; Function Attrs: argmemonly nounwind willreturn
-declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #1
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1
declare arm_aapcs_vfpcc i32 @l(...)
diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll
index 39dededb5973a..dd63b8564bdb4 100644
--- a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll
@@ -3,7 +3,7 @@
; Check some loop postinc's for properly distributed post-incs
-define i32 @vaddv(i32* nocapture readonly %data, i32 %N) {
+define i32 @vaddv(ptr nocapture readonly %data, i32 %N) {
; CHECK-LABEL: vaddv:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -35,25 +35,25 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret i32 %x.0.lcssa
for.body: ; preds = %entry, %for.body
- %data.addr.014 = phi i32* [ %add.ptr1, %for.body ], [ %data, %entry ]
+ %data.addr.014 = phi ptr [ %add.ptr1, %for.body ], [ %data, %entry ]
%i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
%x.012 = phi i32 [ %7, %for.body ], [ 0, %entry ]
- %0 = bitcast i32* %data.addr.014 to <4 x i32>*
- %1 = load <4 x i32>, <4 x i32>* %0, align 4
+ %0 = bitcast ptr %data.addr.014 to ptr
+ %1 = load <4 x i32>, ptr %0, align 4
%2 = tail call i32 @llvm.arm.mve.addv.v4i32(<4 x i32> %1, i32 0)
%3 = add i32 %2, %x.012
- %add.ptr = getelementptr inbounds i32, i32* %data.addr.014, i32 4
- %4 = bitcast i32* %add.ptr to <4 x i32>*
- %5 = load <4 x i32>, <4 x i32>* %4, align 4
+ %add.ptr = getelementptr inbounds i32, ptr %data.addr.014, i32 4
+ %4 = bitcast ptr %add.ptr to ptr
+ %5 = load <4 x i32>, ptr %4, align 4
%6 = tail call i32 @llvm.arm.mve.addv.v4i32(<4 x i32> %5, i32 0)
%7 = add i32 %3, %6
- %add.ptr1 = getelementptr inbounds i32, i32* %data.addr.014, i32 8
+ %add.ptr1 = getelementptr inbounds i32, ptr %data.addr.014, i32 8
%inc = add nuw nsw i32 %i.013, 1
%exitcond = icmp eq i32 %inc, %N
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
-define void @arm_cmplx_dot_prod_q15(i16* nocapture readonly %pSrcA, i16* nocapture readonly %pSrcB, i32 %numSamples, i32* nocapture %realResult, i32* nocapture %imagResult) {
+define void @arm_cmplx_dot_prod_q15(ptr nocapture readonly %pSrcA, ptr nocapture readonly %pSrcB, i32 %numSamples, ptr nocapture %realResult, ptr nocapture %imagResult) {
; CHECK-LABEL: arm_cmplx_dot_prod_q15:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -129,42 +129,42 @@ entry:
%mul = shl i32 %numSamples, 1
%sub = add i32 %mul, -8
%shr = lshr i32 %sub, 3
- %vecSrcB.0.in102 = bitcast i16* %pSrcB to <8 x i16>*
- %vecSrcB.0103 = load <8 x i16>, <8 x i16>* %vecSrcB.0.in102, align 2
- %vecSrcA.0.in104 = bitcast i16* %pSrcA to <8 x i16>*
- %vecSrcA.0105 = load <8 x i16>, <8 x i16>* %vecSrcA.0.in104, align 2
+ %vecSrcB.0.in102 = bitcast ptr %pSrcB to ptr
+ %vecSrcB.0103 = load <8 x i16>, ptr %vecSrcB.0.in102, align 2
+ %vecSrcA.0.in104 = bitcast ptr %pSrcA to ptr
+ %vecSrcA.0105 = load <8 x i16>, ptr %vecSrcA.0.in104, align 2
%cmp106 = icmp eq i32 %shr, 0
br i1 %cmp106, label %while.end, label %while.body.preheader
while.body.preheader: ; preds = %entry
%0 = shl i32 %shr, 4
- %scevgep = getelementptr i16, i16* %pSrcA, i32 %0
+ %scevgep = getelementptr i16, ptr %pSrcA, i32 %0
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
%vecSrcA.0115 = phi <8 x i16> [ %vecSrcA.0, %while.body ], [ %vecSrcA.0105, %while.body.preheader ]
%vecSrcB.0114 = phi <8 x i16> [ %vecSrcB.0, %while.body ], [ %vecSrcB.0103, %while.body.preheader ]
- %vecSrcB.0.in.in113 = phi i16* [ %add.ptr3, %while.body ], [ %pSrcB, %while.body.preheader ]
- %vecSrcA.0.in.in112 = phi i16* [ %add.ptr2, %while.body ], [ %pSrcA, %while.body.preheader ]
+ %vecSrcB.0.in.in113 = phi ptr [ %add.ptr3, %while.body ], [ %pSrcB, %while.body.preheader ]
+ %vecSrcA.0.in.in112 = phi ptr [ %add.ptr2, %while.body ], [ %pSrcA, %while.body.preheader ]
%accImag.0.off32111 = phi i32 [ %15, %while.body ], [ 0, %while.body.preheader ]
%accImag.0.off0110 = phi i32 [ %16, %while.body ], [ 0, %while.body.preheader ]
%accReal.0.off32109 = phi i32 [ %12, %while.body ], [ 0, %while.body.preheader ]
%accReal.0.off0108 = phi i32 [ %13, %while.body ], [ 0, %while.body.preheader ]
%blkCnt.0107 = phi i32 [ %dec, %while.body ], [ %shr, %while.body.preheader ]
- %pSrcB.addr.0 = getelementptr inbounds i16, i16* %vecSrcB.0.in.in113, i32 8
- %pSrcA.addr.0 = getelementptr inbounds i16, i16* %vecSrcA.0.in.in112, i32 8
+ %pSrcB.addr.0 = getelementptr inbounds i16, ptr %vecSrcB.0.in.in113, i32 8
+ %pSrcA.addr.0 = getelementptr inbounds i16, ptr %vecSrcA.0.in.in112, i32 8
%1 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 0, i32 %accReal.0.off0108, i32 %accReal.0.off32109, <8 x i16> %vecSrcA.0115, <8 x i16> %vecSrcB.0114)
%2 = extractvalue { i32, i32 } %1, 1
%3 = extractvalue { i32, i32 } %1, 0
- %4 = bitcast i16* %pSrcA.addr.0 to <8 x i16>*
- %5 = load <8 x i16>, <8 x i16>* %4, align 2
- %add.ptr2 = getelementptr inbounds i16, i16* %vecSrcA.0.in.in112, i32 16
+ %4 = bitcast ptr %pSrcA.addr.0 to ptr
+ %5 = load <8 x i16>, ptr %4, align 2
+ %add.ptr2 = getelementptr inbounds i16, ptr %vecSrcA.0.in.in112, i32 16
%6 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 1, i32 %accImag.0.off0110, i32 %accImag.0.off32111, <8 x i16> %vecSrcA.0115, <8 x i16> %vecSrcB.0114)
%7 = extractvalue { i32, i32 } %6, 1
%8 = extractvalue { i32, i32 } %6, 0
- %9 = bitcast i16* %pSrcB.addr.0 to <8 x i16>*
- %10 = load <8 x i16>, <8 x i16>* %9, align 2
- %add.ptr3 = getelementptr inbounds i16, i16* %vecSrcB.0.in.in113, i32 16
+ %9 = bitcast ptr %pSrcB.addr.0 to ptr
+ %10 = load <8 x i16>, ptr %9, align 2
+ %add.ptr3 = getelementptr inbounds i16, ptr %vecSrcB.0.in.in113, i32 16
%11 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 0, i32 %3, i32 %2, <8 x i16> %5, <8 x i16> %10)
%12 = extractvalue { i32, i32 } %11, 1
%13 = extractvalue { i32, i32 } %11, 0
@@ -172,15 +172,15 @@ while.body: ; preds = %while.body.preheade
%15 = extractvalue { i32, i32 } %14, 1
%16 = extractvalue { i32, i32 } %14, 0
%dec = add nsw i32 %blkCnt.0107, -1
- %vecSrcB.0.in = bitcast i16* %add.ptr3 to <8 x i16>*
- %vecSrcB.0 = load <8 x i16>, <8 x i16>* %vecSrcB.0.in, align 2
- %vecSrcA.0.in = bitcast i16* %add.ptr2 to <8 x i16>*
- %vecSrcA.0 = load <8 x i16>, <8 x i16>* %vecSrcA.0.in, align 2
+ %vecSrcB.0.in = bitcast ptr %add.ptr3 to ptr
+ %vecSrcB.0 = load <8 x i16>, ptr %vecSrcB.0.in, align 2
+ %vecSrcA.0.in = bitcast ptr %add.ptr2 to ptr
+ %vecSrcA.0 = load <8 x i16>, ptr %vecSrcA.0.in, align 2
%cmp = icmp eq i32 %dec, 0
br i1 %cmp, label %while.cond.while.end_crit_edge, label %while.body
while.cond.while.end_crit_edge: ; preds = %while.body
- %scevgep136 = getelementptr i16, i16* %pSrcB, i32 %0
+ %scevgep136 = getelementptr i16, ptr %pSrcB, i32 %0
br label %while.end
while.end: ; preds = %while.cond.while.end_crit_edge, %entry
@@ -188,8 +188,8 @@ while.end: ; preds = %while.cond.while.en
%accReal.0.off32.lcssa = phi i32 [ %12, %while.cond.while.end_crit_edge ], [ 0, %entry ]
%accImag.0.off0.lcssa = phi i32 [ %16, %while.cond.while.end_crit_edge ], [ 0, %entry ]
%accImag.0.off32.lcssa = phi i32 [ %15, %while.cond.while.end_crit_edge ], [ 0, %entry ]
- %vecSrcA.0.in.in.lcssa = phi i16* [ %scevgep, %while.cond.while.end_crit_edge ], [ %pSrcA, %entry ]
- %vecSrcB.0.in.in.lcssa = phi i16* [ %scevgep136, %while.cond.while.end_crit_edge ], [ %pSrcB, %entry ]
+ %vecSrcA.0.in.in.lcssa = phi ptr [ %scevgep, %while.cond.while.end_crit_edge ], [ %pSrcA, %entry ]
+ %vecSrcB.0.in.in.lcssa = phi ptr [ %scevgep136, %while.cond.while.end_crit_edge ], [ %pSrcB, %entry ]
%vecSrcB.0.lcssa = phi <8 x i16> [ %vecSrcB.0, %while.cond.while.end_crit_edge ], [ %vecSrcB.0103, %entry ]
%vecSrcA.0.lcssa = phi <8 x i16> [ %vecSrcA.0, %while.cond.while.end_crit_edge ], [ %vecSrcA.0105, %entry ]
%17 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 0, i32 %accReal.0.off0.lcssa, i32 %accReal.0.off32.lcssa, <8 x i16> %vecSrcA.0.lcssa, <8 x i16> %vecSrcB.0.lcssa)
@@ -215,19 +215,19 @@ while.end: ; preds = %while.cond.while.en
br i1 %cmp1095, label %while.end34, label %while.body11
while.body11: ; preds = %while.end, %while.body11
- %pSrcA.addr.1100 = phi i16* [ %incdec.ptr12, %while.body11 ], [ %vecSrcA.0.in.in.lcssa, %while.end ]
- %pSrcB.addr.199 = phi i16* [ %incdec.ptr14, %while.body11 ], [ %vecSrcB.0.in.in.lcssa, %while.end ]
+ %pSrcA.addr.1100 = phi ptr [ %incdec.ptr12, %while.body11 ], [ %vecSrcA.0.in.in.lcssa, %while.end ]
+ %pSrcB.addr.199 = phi ptr [ %incdec.ptr14, %while.body11 ], [ %vecSrcB.0.in.in.lcssa, %while.end ]
%accImag.198 = phi i64 [ %add32, %while.body11 ], [ %30, %while.end ]
%accReal.197 = phi i64 [ %sub27, %while.body11 ], [ %23, %while.end ]
%blkCnt.196 = phi i32 [ %dec33, %while.body11 ], [ %shr8, %while.end ]
- %incdec.ptr = getelementptr inbounds i16, i16* %pSrcA.addr.1100, i32 1
- %31 = load i16, i16* %pSrcA.addr.1100, align 2
- %incdec.ptr12 = getelementptr inbounds i16, i16* %pSrcA.addr.1100, i32 2
- %32 = load i16, i16* %incdec.ptr, align 2
- %incdec.ptr13 = getelementptr inbounds i16, i16* %pSrcB.addr.199, i32 1
- %33 = load i16, i16* %pSrcB.addr.199, align 2
- %incdec.ptr14 = getelementptr inbounds i16, i16* %pSrcB.addr.199, i32 2
- %34 = load i16, i16* %incdec.ptr13, align 2
+ %incdec.ptr = getelementptr inbounds i16, ptr %pSrcA.addr.1100, i32 1
+ %31 = load i16, ptr %pSrcA.addr.1100, align 2
+ %incdec.ptr12 = getelementptr inbounds i16, ptr %pSrcA.addr.1100, i32 2
+ %32 = load i16, ptr %incdec.ptr, align 2
+ %incdec.ptr13 = getelementptr inbounds i16, ptr %pSrcB.addr.199, i32 1
+ %33 = load i16, ptr %pSrcB.addr.199, align 2
+ %incdec.ptr14 = getelementptr inbounds i16, ptr %pSrcB.addr.199, i32 2
+ %34 = load i16, ptr %incdec.ptr13, align 2
%conv = sext i16 %31 to i32
%conv15 = sext i16 %33 to i32
%mul16 = mul nsw i32 %conv15, %conv
@@ -258,13 +258,13 @@ while.end34.loopexit: ; preds = %while.body11
while.end34: ; preds = %while.end34.loopexit, %while.end
%accReal.1.lcssa.off6 = phi i32 [ %extract.t, %while.end ], [ %extract.t128, %while.end34.loopexit ]
%accImag.1.lcssa.off6 = phi i32 [ %extract.t130, %while.end ], [ %extract.t132, %while.end34.loopexit ]
- store i32 %accReal.1.lcssa.off6, i32* %realResult, align 4
- store i32 %accImag.1.lcssa.off6, i32* %imagResult, align 4
+ store i32 %accReal.1.lcssa.off6, ptr %realResult, align 4
+ store i32 %accImag.1.lcssa.off6, ptr %imagResult, align 4
ret void
}
-define void @fma8(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %n) {
+define void @fma8(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %n) {
; CHECK-LABEL: fma8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, lr}
@@ -335,18 +335,18 @@ vector.ph: ; preds = %for.body.preheader
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds float, float* %A, i32 %index
- %1 = bitcast float* %0 to <8 x float>*
- %wide.load = load <8 x float>, <8 x float>* %1, align 4
- %2 = getelementptr inbounds float, float* %B, i32 %index
- %3 = bitcast float* %2 to <8 x float>*
- %wide.load10 = load <8 x float>, <8 x float>* %3, align 4
+ %0 = getelementptr inbounds float, ptr %A, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.load = load <8 x float>, ptr %1, align 4
+ %2 = getelementptr inbounds float, ptr %B, i32 %index
+ %3 = bitcast ptr %2 to ptr
+ %wide.load10 = load <8 x float>, ptr %3, align 4
%4 = fmul fast <8 x float> %wide.load10, %wide.load
- %5 = getelementptr inbounds float, float* %C, i32 %index
- %6 = bitcast float* %5 to <8 x float>*
- %wide.load11 = load <8 x float>, <8 x float>* %6, align 4
+ %5 = getelementptr inbounds float, ptr %C, i32 %index
+ %6 = bitcast ptr %5 to ptr
+ %wide.load11 = load <8 x float>, ptr %6, align 4
%7 = fadd fast <8 x float> %wide.load11, %4
- store <8 x float> %7, <8 x float>* %6, align 4
+ store <8 x float> %7, ptr %6, align 4
%index.next = add i32 %index, 8
%8 = icmp eq i32 %index.next, %n.vec
br i1 %8, label %middle.block, label %vector.body
@@ -360,15 +360,15 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
for.body: ; preds = %for.body.preheader12, %for.body
%i.09 = phi i32 [ %inc, %for.body ], [ %i.09.ph, %for.body.preheader12 ]
- %arrayidx = getelementptr inbounds float, float* %A, i32 %i.09
- %9 = load float, float* %arrayidx, align 4
- %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.09
- %10 = load float, float* %arrayidx1, align 4
+ %arrayidx = getelementptr inbounds float, ptr %A, i32 %i.09
+ %9 = load float, ptr %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds float, ptr %B, i32 %i.09
+ %10 = load float, ptr %arrayidx1, align 4
%mul = fmul fast float %10, %9
- %arrayidx2 = getelementptr inbounds float, float* %C, i32 %i.09
- %11 = load float, float* %arrayidx2, align 4
+ %arrayidx2 = getelementptr inbounds float, ptr %C, i32 %i.09
+ %11 = load float, ptr %arrayidx2, align 4
%add = fadd fast float %11, %mul
- store float %add, float* %arrayidx2, align 4
+ store float %add, ptr %arrayidx2, align 4
%inc = add nuw nsw i32 %i.09, 1
%exitcond = icmp eq i32 %inc, %n
br i1 %exitcond, label %for.cond.cleanup, label %for.body
diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll
index 2aa183c31bab5..82a186bcc73d6 100644
--- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll
@@ -4,7 +4,7 @@
; Check some LSR loop postinc
; fma loop with a destination that is the same as one of the sources
-define void @fma(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %n) {
+define void @fma(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %n) {
; CHECK-LABEL: fma:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, lr}
@@ -70,19 +70,19 @@ vector.ph: ; preds = %for.body.preheader
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds float, float* %A, i32 %index
- %1 = bitcast float* %0 to <4 x float>*
- %wide.load = load <4 x float>, <4 x float>* %1, align 4
- %2 = getelementptr inbounds float, float* %B, i32 %index
- %3 = bitcast float* %2 to <4 x float>*
- %wide.load10 = load <4 x float>, <4 x float>* %3, align 4
+ %0 = getelementptr inbounds float, ptr %A, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.load = load <4 x float>, ptr %1, align 4
+ %2 = getelementptr inbounds float, ptr %B, i32 %index
+ %3 = bitcast ptr %2 to ptr
+ %wide.load10 = load <4 x float>, ptr %3, align 4
%4 = fmul fast <4 x float> %wide.load10, %wide.load
- %5 = getelementptr inbounds float, float* %C, i32 %index
- %6 = bitcast float* %5 to <4 x float>*
- %wide.load11 = load <4 x float>, <4 x float>* %6, align 4
+ %5 = getelementptr inbounds float, ptr %C, i32 %index
+ %6 = bitcast ptr %5 to ptr
+ %wide.load11 = load <4 x float>, ptr %6, align 4
%7 = fadd fast <4 x float> %wide.load11, %4
- %8 = bitcast float* %5 to <4 x float>*
- store <4 x float> %7, <4 x float>* %8, align 4
+ %8 = bitcast ptr %5 to ptr
+ store <4 x float> %7, ptr %8, align 4
%index.next = add i32 %index, 4
%9 = icmp eq i32 %index.next, %n.vec
br i1 %9, label %middle.block, label %vector.body
@@ -96,15 +96,15 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
for.body: ; preds = %for.body.preheader12, %for.body
%i.09 = phi i32 [ %inc, %for.body ], [ %i.09.ph, %for.body.preheader12 ]
- %arrayidx = getelementptr inbounds float, float* %A, i32 %i.09
- %10 = load float, float* %arrayidx, align 4
- %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.09
- %11 = load float, float* %arrayidx1, align 4
+ %arrayidx = getelementptr inbounds float, ptr %A, i32 %i.09
+ %10 = load float, ptr %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds float, ptr %B, i32 %i.09
+ %11 = load float, ptr %arrayidx1, align 4
%mul = fmul fast float %11, %10
- %arrayidx2 = getelementptr inbounds float, float* %C, i32 %i.09
- %12 = load float, float* %arrayidx2, align 4
+ %arrayidx2 = getelementptr inbounds float, ptr %C, i32 %i.09
+ %12 = load float, ptr %arrayidx2, align 4
%add = fadd fast float %12, %mul
- store float %add, float* %arrayidx2, align 4
+ store float %add, ptr %arrayidx2, align 4
%inc = add nuw nsw i32 %i.09, 1
%exitcond = icmp eq i32 %inc, %n
br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -113,7 +113,7 @@ for.body: ; preds = %for.body.preheader1
; Same as above but tail predicated
; FIXME: The postinc here is put on the load, not the store. An extra mov is needed in the loop because of it.
-define void @fma_tailpred(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %n) {
+define void @fma_tailpred(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %n) {
; CHECK-LABEL: fma_tailpred:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
@@ -173,20 +173,20 @@ vector.body: ; preds = %vector.body, %vecto
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = or <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
- %0 = getelementptr inbounds float, float* %A, i32 %index
+ %0 = getelementptr inbounds float, ptr %A, i32 %index
%1 = icmp ule <4 x i32> %induction, %broadcast.splat11
- %2 = bitcast float* %0 to <4 x float>*
- %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef)
- %3 = getelementptr inbounds float, float* %B, i32 %index
- %4 = bitcast float* %3 to <4 x float>*
- %wide.masked.load12 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %4, i32 4, <4 x i1> %1, <4 x float> undef)
+ %2 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %2, i32 4, <4 x i1> %1, <4 x float> undef)
+ %3 = getelementptr inbounds float, ptr %B, i32 %index
+ %4 = bitcast ptr %3 to ptr
+ %wide.masked.load12 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %4, i32 4, <4 x i1> %1, <4 x float> undef)
%5 = fmul fast <4 x float> %wide.masked.load12, %wide.masked.load
- %6 = getelementptr inbounds float, float* %C, i32 %index
- %7 = bitcast float* %6 to <4 x float>*
- %wide.masked.load13 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %7, i32 4, <4 x i1> %1, <4 x float> undef)
+ %6 = getelementptr inbounds float, ptr %C, i32 %index
+ %7 = bitcast ptr %6 to ptr
+ %wide.masked.load13 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %7, i32 4, <4 x i1> %1, <4 x float> undef)
%8 = fadd fast <4 x float> %wide.masked.load13, %5
- %9 = bitcast float* %6 to <4 x float>*
- call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %8, <4 x float>* %9, i32 4, <4 x i1> %1)
+ %9 = bitcast ptr %6 to ptr
+ call void @llvm.masked.store.v4f32.p0(<4 x float> %8, ptr %9, i32 4, <4 x i1> %1)
%index.next = add i32 %index, 4
%10 = icmp eq i32 %index.next, %n.vec
br i1 %10, label %for.cond.cleanup, label %vector.body
@@ -197,7 +197,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
; Multiple loads of the loop with a common base
-define i8* @test(i8* nocapture readonly %input_row, i8* nocapture readonly %input_col, i16 zeroext %output_ch, i16 zeroext %num_cols, i32 %col_offset, i16 signext %activation_min, i16 zeroext %row_len, i32* nocapture readonly %bias, i8* returned %out) {
+define ptr @test(ptr nocapture readonly %input_row, ptr nocapture readonly %input_col, i16 zeroext %output_ch, i16 zeroext %num_cols, i32 %col_offset, i16 signext %activation_min, i16 zeroext %row_len, ptr nocapture readonly %bias, ptr returned %out) {
; CHECK-LABEL: test:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -291,11 +291,11 @@ for.cond.preheader: ; preds = %entry
for.body.lr.ph: ; preds = %for.cond.preheader
%conv5 = zext i16 %row_len to i32
- %add.ptr9 = getelementptr inbounds i8, i8* %input_col, i32 %conv5
+ %add.ptr9 = getelementptr inbounds i8, ptr %input_col, i32 %conv5
%mul11 = shl nuw nsw i32 %conv5, 1
- %add.ptr12 = getelementptr inbounds i8, i8* %input_col, i32 %mul11
+ %add.ptr12 = getelementptr inbounds i8, ptr %input_col, i32 %mul11
%mul14 = mul nuw nsw i32 %conv5, 3
- %add.ptr15 = getelementptr inbounds i8, i8* %input_col, i32 %mul14
+ %add.ptr15 = getelementptr inbounds i8, ptr %input_col, i32 %mul14
%add = add nuw nsw i32 %conv5, 7
%div = lshr i32 %add, 3
%conv25 = trunc i32 %col_offset to i16
@@ -306,14 +306,14 @@ for.body.lr.ph: ; preds = %for.cond.preheader
for.body: ; preds = %for.cond.cleanup23, %for.body.lr.ph
%i_out_ch.0116 = phi i32 [ 0, %for.body.lr.ph ], [ %inc37, %for.cond.cleanup23 ]
%i_row_loop.0115 = phi i32 [ undef, %for.body.lr.ph ], [ %i_row_loop.1.lcssa, %for.cond.cleanup23 ]
- %arrayidx = getelementptr inbounds i32, i32* %bias, i32 %i_out_ch.0116
- %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %bias, i32 %i_out_ch.0116
+ %0 = load i32, ptr %arrayidx, align 4
%cmp2199 = icmp slt i32 %i_row_loop.0115, %div
br i1 %cmp2199, label %for.body24.preheader, label %for.cond.cleanup23
for.body24.preheader: ; preds = %for.body
%mul = mul nuw nsw i32 %i_out_ch.0116, %conv5
- %add.ptr = getelementptr inbounds i8, i8* %input_row, i32 %mul
+ %add.ptr = getelementptr inbounds i8, ptr %input_row, i32 %mul
br label %for.body24
for.cond.cleanup23: ; preds = %for.body24, %for.body
@@ -326,46 +326,46 @@ for.cond.cleanup23: ; preds = %for.body24, %for.bo
%add32 = add nsw i32 %add31, %acc_2.0.lcssa
%add33 = add nsw i32 %add32, %acc_3.0.lcssa
%conv34 = trunc i32 %add33 to i8
- %arrayidx35 = getelementptr inbounds i8, i8* %out, i32 %i_out_ch.0116
- store i8 %conv34, i8* %arrayidx35, align 1
+ %arrayidx35 = getelementptr inbounds i8, ptr %out, i32 %i_out_ch.0116
+ store i8 %conv34, ptr %arrayidx35, align 1
%inc37 = add nuw nsw i32 %i_out_ch.0116, 1
%exitcond120 = icmp eq i32 %inc37, %conv2
br i1 %exitcond120, label %if.end, label %for.body
for.body24: ; preds = %for.body24, %for.body24.preheader
- %ip_r0.0109 = phi i8* [ %add.ptr26, %for.body24 ], [ %add.ptr, %for.body24.preheader ]
- %ip_c0.0108 = phi i8* [ %add.ptr27, %for.body24 ], [ %input_col, %for.body24.preheader ]
- %ip_c1.0107 = phi i8* [ %add.ptr28, %for.body24 ], [ %add.ptr9, %for.body24.preheader ]
- %ip_c2.0106 = phi i8* [ %add.ptr29, %for.body24 ], [ %add.ptr12, %for.body24.preheader ]
+ %ip_r0.0109 = phi ptr [ %add.ptr26, %for.body24 ], [ %add.ptr, %for.body24.preheader ]
+ %ip_c0.0108 = phi ptr [ %add.ptr27, %for.body24 ], [ %input_col, %for.body24.preheader ]
+ %ip_c1.0107 = phi ptr [ %add.ptr28, %for.body24 ], [ %add.ptr9, %for.body24.preheader ]
+ %ip_c2.0106 = phi ptr [ %add.ptr29, %for.body24 ], [ %add.ptr12, %for.body24.preheader ]
%i_row_loop.1105 = phi i32 [ %inc, %for.body24 ], [ %i_row_loop.0115, %for.body24.preheader ]
- %ip_c3.0104 = phi i8* [ %add.ptr30, %for.body24 ], [ %add.ptr15, %for.body24.preheader ]
+ %ip_c3.0104 = phi ptr [ %add.ptr30, %for.body24 ], [ %add.ptr15, %for.body24.preheader ]
%acc_3.0103 = phi i32 [ %23, %for.body24 ], [ %0, %for.body24.preheader ]
%acc_2.0102 = phi i32 [ %22, %for.body24 ], [ %0, %for.body24.preheader ]
%acc_1.0101 = phi i32 [ %21, %for.body24 ], [ %0, %for.body24.preheader ]
%acc_0.0100 = phi i32 [ %20, %for.body24 ], [ %0, %for.body24.preheader ]
- %1 = bitcast i8* %ip_r0.0109 to <8 x i8>*
- %2 = load <8 x i8>, <8 x i8>* %1, align 1
+ %1 = bitcast ptr %ip_r0.0109 to ptr
+ %2 = load <8 x i8>, ptr %1, align 1
%3 = sext <8 x i8> %2 to <8 x i16>
- %add.ptr26 = getelementptr inbounds i8, i8* %ip_r0.0109, i32 8
- %4 = bitcast i8* %ip_c0.0108 to <8 x i8>*
- %5 = load <8 x i8>, <8 x i8>* %4, align 1
+ %add.ptr26 = getelementptr inbounds i8, ptr %ip_r0.0109, i32 8
+ %4 = bitcast ptr %ip_c0.0108 to ptr
+ %5 = load <8 x i8>, ptr %4, align 1
%6 = sext <8 x i8> %5 to <8 x i16>
- %add.ptr27 = getelementptr inbounds i8, i8* %ip_c0.0108, i32 8
+ %add.ptr27 = getelementptr inbounds i8, ptr %ip_c0.0108, i32 8
%7 = add <8 x i16> %.splat, %6
- %8 = bitcast i8* %ip_c1.0107 to <8 x i8>*
- %9 = load <8 x i8>, <8 x i8>* %8, align 1
+ %8 = bitcast ptr %ip_c1.0107 to ptr
+ %9 = load <8 x i8>, ptr %8, align 1
%10 = sext <8 x i8> %9 to <8 x i16>
- %add.ptr28 = getelementptr inbounds i8, i8* %ip_c1.0107, i32 8
+ %add.ptr28 = getelementptr inbounds i8, ptr %ip_c1.0107, i32 8
%11 = add <8 x i16> %.splat, %10
- %12 = bitcast i8* %ip_c2.0106 to <8 x i8>*
- %13 = load <8 x i8>, <8 x i8>* %12, align 1
+ %12 = bitcast ptr %ip_c2.0106 to ptr
+ %13 = load <8 x i8>, ptr %12, align 1
%14 = sext <8 x i8> %13 to <8 x i16>
- %add.ptr29 = getelementptr inbounds i8, i8* %ip_c2.0106, i32 8
+ %add.ptr29 = getelementptr inbounds i8, ptr %ip_c2.0106, i32 8
%15 = add <8 x i16> %.splat, %14
- %16 = bitcast i8* %ip_c3.0104 to <8 x i8>*
- %17 = load <8 x i8>, <8 x i8>* %16, align 1
+ %16 = bitcast ptr %ip_c3.0104 to ptr
+ %17 = load <8 x i8>, ptr %16, align 1
%18 = sext <8 x i8> %17 to <8 x i16>
- %add.ptr30 = getelementptr inbounds i8, i8* %ip_c3.0104, i32 8
+ %add.ptr30 = getelementptr inbounds i8, ptr %ip_c3.0104, i32 8
%19 = add <8 x i16> %.splat, %18
%20 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 %acc_0.0100, <8 x i16> %3, <8 x i16> %7)
%21 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 %acc_1.0101, <8 x i16> %3, <8 x i16> %11)
@@ -376,11 +376,11 @@ for.body24: ; preds = %for.body24, %for.bo
br i1 %exitcond, label %for.cond.cleanup23, label %for.body24
if.end: ; preds = %for.cond.cleanup23, %for.cond.preheader, %entry
- ret i8* %out
+ ret ptr %out
}
; Same as above with optsize
-define i8* @test_optsize(i8* nocapture readonly %input_row, i8* nocapture readonly %input_col, i16 zeroext %output_ch, i16 zeroext %num_cols, i32 %col_offset, i16 signext %activation_min, i16 zeroext %row_len, i32* nocapture readonly %bias, i8* returned %out) optsize {
+define ptr @test_optsize(ptr nocapture readonly %input_row, ptr nocapture readonly %input_col, i16 zeroext %output_ch, i16 zeroext %num_cols, i32 %col_offset, i16 signext %activation_min, i16 zeroext %row_len, ptr nocapture readonly %bias, ptr returned %out) optsize {
; CHECK-LABEL: test_optsize:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -473,11 +473,11 @@ for.cond.preheader: ; preds = %entry
for.body.lr.ph: ; preds = %for.cond.preheader
%conv5 = zext i16 %row_len to i32
- %add.ptr9 = getelementptr inbounds i8, i8* %input_col, i32 %conv5
+ %add.ptr9 = getelementptr inbounds i8, ptr %input_col, i32 %conv5
%mul11 = shl nuw nsw i32 %conv5, 1
- %add.ptr12 = getelementptr inbounds i8, i8* %input_col, i32 %mul11
+ %add.ptr12 = getelementptr inbounds i8, ptr %input_col, i32 %mul11
%mul14 = mul nuw nsw i32 %conv5, 3
- %add.ptr15 = getelementptr inbounds i8, i8* %input_col, i32 %mul14
+ %add.ptr15 = getelementptr inbounds i8, ptr %input_col, i32 %mul14
%add = add nuw nsw i32 %conv5, 7
%div = lshr i32 %add, 3
%conv25 = trunc i32 %col_offset to i16
@@ -488,14 +488,14 @@ for.body.lr.ph: ; preds = %for.cond.preheader
for.body: ; preds = %for.cond.cleanup23, %for.body.lr.ph
%i_out_ch.0116 = phi i32 [ 0, %for.body.lr.ph ], [ %inc37, %for.cond.cleanup23 ]
%i_row_loop.0115 = phi i32 [ undef, %for.body.lr.ph ], [ %i_row_loop.1.lcssa, %for.cond.cleanup23 ]
- %arrayidx = getelementptr inbounds i32, i32* %bias, i32 %i_out_ch.0116
- %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %bias, i32 %i_out_ch.0116
+ %0 = load i32, ptr %arrayidx, align 4
%cmp2199 = icmp slt i32 %i_row_loop.0115, %div
br i1 %cmp2199, label %for.body24.preheader, label %for.cond.cleanup23
for.body24.preheader: ; preds = %for.body
%mul = mul nuw nsw i32 %i_out_ch.0116, %conv5
- %add.ptr = getelementptr inbounds i8, i8* %input_row, i32 %mul
+ %add.ptr = getelementptr inbounds i8, ptr %input_row, i32 %mul
br label %for.body24
for.cond.cleanup23: ; preds = %for.body24, %for.body
@@ -508,46 +508,46 @@ for.cond.cleanup23: ; preds = %for.body24, %for.bo
%add32 = add nsw i32 %add31, %acc_2.0.lcssa
%add33 = add nsw i32 %add32, %acc_3.0.lcssa
%conv34 = trunc i32 %add33 to i8
- %arrayidx35 = getelementptr inbounds i8, i8* %out, i32 %i_out_ch.0116
- store i8 %conv34, i8* %arrayidx35, align 1
+ %arrayidx35 = getelementptr inbounds i8, ptr %out, i32 %i_out_ch.0116
+ store i8 %conv34, ptr %arrayidx35, align 1
%inc37 = add nuw nsw i32 %i_out_ch.0116, 1
%exitcond120 = icmp eq i32 %inc37, %conv2
br i1 %exitcond120, label %if.end, label %for.body
for.body24: ; preds = %for.body24, %for.body24.preheader
- %ip_r0.0109 = phi i8* [ %add.ptr26, %for.body24 ], [ %add.ptr, %for.body24.preheader ]
- %ip_c0.0108 = phi i8* [ %add.ptr27, %for.body24 ], [ %input_col, %for.body24.preheader ]
- %ip_c1.0107 = phi i8* [ %add.ptr28, %for.body24 ], [ %add.ptr9, %for.body24.preheader ]
- %ip_c2.0106 = phi i8* [ %add.ptr29, %for.body24 ], [ %add.ptr12, %for.body24.preheader ]
+ %ip_r0.0109 = phi ptr [ %add.ptr26, %for.body24 ], [ %add.ptr, %for.body24.preheader ]
+ %ip_c0.0108 = phi ptr [ %add.ptr27, %for.body24 ], [ %input_col, %for.body24.preheader ]
+ %ip_c1.0107 = phi ptr [ %add.ptr28, %for.body24 ], [ %add.ptr9, %for.body24.preheader ]
+ %ip_c2.0106 = phi ptr [ %add.ptr29, %for.body24 ], [ %add.ptr12, %for.body24.preheader ]
%i_row_loop.1105 = phi i32 [ %inc, %for.body24 ], [ %i_row_loop.0115, %for.body24.preheader ]
- %ip_c3.0104 = phi i8* [ %add.ptr30, %for.body24 ], [ %add.ptr15, %for.body24.preheader ]
+ %ip_c3.0104 = phi ptr [ %add.ptr30, %for.body24 ], [ %add.ptr15, %for.body24.preheader ]
%acc_3.0103 = phi i32 [ %23, %for.body24 ], [ %0, %for.body24.preheader ]
%acc_2.0102 = phi i32 [ %22, %for.body24 ], [ %0, %for.body24.preheader ]
%acc_1.0101 = phi i32 [ %21, %for.body24 ], [ %0, %for.body24.preheader ]
%acc_0.0100 = phi i32 [ %20, %for.body24 ], [ %0, %for.body24.preheader ]
- %1 = bitcast i8* %ip_r0.0109 to <8 x i8>*
- %2 = load <8 x i8>, <8 x i8>* %1, align 1
+ %1 = bitcast ptr %ip_r0.0109 to ptr
+ %2 = load <8 x i8>, ptr %1, align 1
%3 = sext <8 x i8> %2 to <8 x i16>
- %add.ptr26 = getelementptr inbounds i8, i8* %ip_r0.0109, i32 8
- %4 = bitcast i8* %ip_c0.0108 to <8 x i8>*
- %5 = load <8 x i8>, <8 x i8>* %4, align 1
+ %add.ptr26 = getelementptr inbounds i8, ptr %ip_r0.0109, i32 8
+ %4 = bitcast ptr %ip_c0.0108 to ptr
+ %5 = load <8 x i8>, ptr %4, align 1
%6 = sext <8 x i8> %5 to <8 x i16>
- %add.ptr27 = getelementptr inbounds i8, i8* %ip_c0.0108, i32 8
+ %add.ptr27 = getelementptr inbounds i8, ptr %ip_c0.0108, i32 8
%7 = add <8 x i16> %.splat, %6
- %8 = bitcast i8* %ip_c1.0107 to <8 x i8>*
- %9 = load <8 x i8>, <8 x i8>* %8, align 1
+ %8 = bitcast ptr %ip_c1.0107 to ptr
+ %9 = load <8 x i8>, ptr %8, align 1
%10 = sext <8 x i8> %9 to <8 x i16>
- %add.ptr28 = getelementptr inbounds i8, i8* %ip_c1.0107, i32 8
+ %add.ptr28 = getelementptr inbounds i8, ptr %ip_c1.0107, i32 8
%11 = add <8 x i16> %.splat, %10
- %12 = bitcast i8* %ip_c2.0106 to <8 x i8>*
- %13 = load <8 x i8>, <8 x i8>* %12, align 1
+ %12 = bitcast ptr %ip_c2.0106 to ptr
+ %13 = load <8 x i8>, ptr %12, align 1
%14 = sext <8 x i8> %13 to <8 x i16>
- %add.ptr29 = getelementptr inbounds i8, i8* %ip_c2.0106, i32 8
+ %add.ptr29 = getelementptr inbounds i8, ptr %ip_c2.0106, i32 8
%15 = add <8 x i16> %.splat, %14
- %16 = bitcast i8* %ip_c3.0104 to <8 x i8>*
- %17 = load <8 x i8>, <8 x i8>* %16, align 1
+ %16 = bitcast ptr %ip_c3.0104 to ptr
+ %17 = load <8 x i8>, ptr %16, align 1
%18 = sext <8 x i8> %17 to <8 x i16>
- %add.ptr30 = getelementptr inbounds i8, i8* %ip_c3.0104, i32 8
+ %add.ptr30 = getelementptr inbounds i8, ptr %ip_c3.0104, i32 8
%19 = add <8 x i16> %.splat, %18
%20 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 %acc_0.0100, <8 x i16> %3, <8 x i16> %7)
%21 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 %acc_1.0101, <8 x i16> %3, <8 x i16> %11)
@@ -558,12 +558,12 @@ for.body24: ; preds = %for.body24, %for.bo
br i1 %exitcond, label %for.cond.cleanup23, label %for.body24
if.end: ; preds = %for.cond.cleanup23, %for.cond.preheader, %entry
- ret i8* %out
+ ret ptr %out
}
; Similar but predicated
-define i32 @arm_nn_mat_mul_core_4x_s8(i32 %row_elements, i32 %offset, i8* %row_base, i8* %col_base, i32* nocapture readnone %sum_col, i32* nocapture %output) {
+define i32 @arm_nn_mat_mul_core_4x_s8(i32 %row_elements, i32 %offset, ptr %row_base, ptr %col_base, ptr nocapture readnone %sum_col, ptr nocapture %output) {
; CHECK-LABEL: arm_nn_mat_mul_core_4x_s8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r10, lr}
@@ -612,10 +612,10 @@ entry:
for.body.preheader: ; preds = %entry
%mul2 = mul nsw i32 %offset, 3
- %add.ptr3 = getelementptr inbounds i8, i8* %row_base, i32 %mul2
+ %add.ptr3 = getelementptr inbounds i8, ptr %row_base, i32 %mul2
%mul = shl nsw i32 %offset, 1
- %add.ptr1 = getelementptr inbounds i8, i8* %row_base, i32 %mul
- %add.ptr = getelementptr inbounds i8, i8* %row_base, i32 %offset
+ %add.ptr1 = getelementptr inbounds i8, ptr %row_base, i32 %mul
+ %add.ptr = getelementptr inbounds i8, ptr %row_base, i32 %offset
%0 = icmp sgt i32 %div, 1
%smax = select i1 %0, i32 %div, i32 1
br label %for.body
@@ -625,54 +625,54 @@ for.cond.cleanup: ; preds = %for.body, %entry
%acc_n.sroa.9.0.lcssa = phi i32 [ 0, %entry ], [ %12, %for.body ]
%acc_n.sroa.6.0.lcssa = phi i32 [ 0, %entry ], [ %9, %for.body ]
%acc_n.sroa.0.0.lcssa = phi i32 [ 0, %entry ], [ %6, %for.body ]
- store i32 %acc_n.sroa.0.0.lcssa, i32* %output, align 4
- %arrayidx19 = getelementptr inbounds i32, i32* %output, i32 1
- store i32 %acc_n.sroa.6.0.lcssa, i32* %arrayidx19, align 4
- %arrayidx21 = getelementptr inbounds i32, i32* %output, i32 2
- store i32 %acc_n.sroa.9.0.lcssa, i32* %arrayidx21, align 4
- %arrayidx23 = getelementptr inbounds i32, i32* %output, i32 3
- store i32 %acc_n.sroa.12.0.lcssa, i32* %arrayidx23, align 4
+ store i32 %acc_n.sroa.0.0.lcssa, ptr %output, align 4
+ %arrayidx19 = getelementptr inbounds i32, ptr %output, i32 1
+ store i32 %acc_n.sroa.6.0.lcssa, ptr %arrayidx19, align 4
+ %arrayidx21 = getelementptr inbounds i32, ptr %output, i32 2
+ store i32 %acc_n.sroa.9.0.lcssa, ptr %arrayidx21, align 4
+ %arrayidx23 = getelementptr inbounds i32, ptr %output, i32 3
+ store i32 %acc_n.sroa.12.0.lcssa, ptr %arrayidx23, align 4
ret i32 0
for.body: ; preds = %for.body, %for.body.preheader
- %col_base.addr.095 = phi i8* [ %add.ptr4, %for.body ], [ %col_base, %for.body.preheader ]
+ %col_base.addr.095 = phi ptr [ %add.ptr4, %for.body ], [ %col_base, %for.body.preheader ]
%acc_n.sroa.0.094 = phi i32 [ %6, %for.body ], [ 0, %for.body.preheader ]
%acc_n.sroa.6.093 = phi i32 [ %9, %for.body ], [ 0, %for.body.preheader ]
%acc_n.sroa.9.092 = phi i32 [ %12, %for.body ], [ 0, %for.body.preheader ]
%i.091 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%row_elem.090 = phi i32 [ %sub, %for.body ], [ %row_elements, %for.body.preheader ]
%acc_n.sroa.12.089 = phi i32 [ %15, %for.body ], [ 0, %for.body.preheader ]
- %ip_row_3.088 = phi i8* [ %add.ptr15, %for.body ], [ %add.ptr3, %for.body.preheader ]
- %ip_row_2.087 = phi i8* [ %add.ptr14, %for.body ], [ %add.ptr1, %for.body.preheader ]
- %ip_row_1.086 = phi i8* [ %add.ptr13, %for.body ], [ %add.ptr, %for.body.preheader ]
- %ip_row_0.085 = phi i8* [ %add.ptr12, %for.body ], [ %row_base, %for.body.preheader ]
+ %ip_row_3.088 = phi ptr [ %add.ptr15, %for.body ], [ %add.ptr3, %for.body.preheader ]
+ %ip_row_2.087 = phi ptr [ %add.ptr14, %for.body ], [ %add.ptr1, %for.body.preheader ]
+ %ip_row_1.086 = phi ptr [ %add.ptr13, %for.body ], [ %add.ptr, %for.body.preheader ]
+ %ip_row_0.085 = phi ptr [ %add.ptr12, %for.body ], [ %row_base, %for.body.preheader ]
%1 = tail call <16 x i1> @llvm.arm.mve.vctp8(i32 %row_elem.090)
%sub = add nsw i32 %row_elem.090, -16
- %2 = bitcast i8* %col_base.addr.095 to <16 x i8>*
- %3 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %2, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer)
- %add.ptr4 = getelementptr inbounds i8, i8* %col_base.addr.095, i32 16
- %4 = bitcast i8* %ip_row_0.085 to <16 x i8>*
- %5 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %4, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer)
+ %2 = bitcast ptr %col_base.addr.095 to ptr
+ %3 = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %2, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer)
+ %add.ptr4 = getelementptr inbounds i8, ptr %col_base.addr.095, i32 16
+ %4 = bitcast ptr %ip_row_0.085 to ptr
+ %5 = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %4, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer)
%6 = tail call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 0, i32 0, i32 0, i32 %acc_n.sroa.0.094, <16 x i8> %5, <16 x i8> %3, <16 x i1> %1)
- %7 = bitcast i8* %ip_row_1.086 to <16 x i8>*
- %8 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %7, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer)
+ %7 = bitcast ptr %ip_row_1.086 to ptr
+ %8 = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %7, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer)
%9 = tail call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 0, i32 0, i32 0, i32 %acc_n.sroa.6.093, <16 x i8> %8, <16 x i8> %3, <16 x i1> %1)
- %10 = bitcast i8* %ip_row_2.087 to <16 x i8>*
- %11 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %10, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer)
+ %10 = bitcast ptr %ip_row_2.087 to ptr
+ %11 = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %10, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer)
%12 = tail call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 0, i32 0, i32 0, i32 %acc_n.sroa.9.092, <16 x i8> %11, <16 x i8> %3, <16 x i1> %1)
- %13 = bitcast i8* %ip_row_3.088 to <16 x i8>*
- %14 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %13, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer)
+ %13 = bitcast ptr %ip_row_3.088 to ptr
+ %14 = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %13, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer)
%15 = tail call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 0, i32 0, i32 0, i32 %acc_n.sroa.12.089, <16 x i8> %14, <16 x i8> %3, <16 x i1> %1)
- %add.ptr12 = getelementptr inbounds i8, i8* %ip_row_0.085, i32 16
- %add.ptr13 = getelementptr inbounds i8, i8* %ip_row_1.086, i32 16
- %add.ptr14 = getelementptr inbounds i8, i8* %ip_row_2.087, i32 16
- %add.ptr15 = getelementptr inbounds i8, i8* %ip_row_3.088, i32 16
+ %add.ptr12 = getelementptr inbounds i8, ptr %ip_row_0.085, i32 16
+ %add.ptr13 = getelementptr inbounds i8, ptr %ip_row_1.086, i32 16
+ %add.ptr14 = getelementptr inbounds i8, ptr %ip_row_2.087, i32 16
+ %add.ptr15 = getelementptr inbounds i8, ptr %ip_row_3.088, i32 16
%inc = add nuw nsw i32 %i.091, 1
%exitcond = icmp eq i32 %inc, %smax
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
-define i8* @signext(i8* %input_row, i8* %input_col, i16 zeroext %output_ch, i16 zeroext %num_cols, i32* nocapture readnone %output_shift, i32* nocapture readnone %output_mult, i32 %out_offset, i32 %col_offset, i32 %row_offset, i16 signext %activation_min, i16 signext %activation_max, i16 zeroext %row_len, i32* nocapture readonly %bias, i8* returned %out) {
+define ptr @signext(ptr %input_row, ptr %input_col, i16 zeroext %output_ch, i16 zeroext %num_cols, ptr nocapture readnone %output_shift, ptr nocapture readnone %output_mult, i32 %out_offset, i32 %col_offset, i32 %row_offset, i16 signext %activation_min, i16 signext %activation_max, i16 zeroext %row_len, ptr nocapture readonly %bias, ptr returned %out) {
; CHECK-LABEL: signext:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -770,11 +770,11 @@ for.cond.preheader: ; preds = %entry
for.body.lr.ph: ; preds = %for.cond.preheader
%conv5 = zext i16 %row_len to i32
- %add.ptr9 = getelementptr inbounds i8, i8* %input_col, i32 %conv5
+ %add.ptr9 = getelementptr inbounds i8, ptr %input_col, i32 %conv5
%mul11 = shl nuw nsw i32 %conv5, 1
- %add.ptr12 = getelementptr inbounds i8, i8* %input_col, i32 %mul11
+ %add.ptr12 = getelementptr inbounds i8, ptr %input_col, i32 %mul11
%mul14 = mul nuw nsw i32 %conv5, 3
- %add.ptr15 = getelementptr inbounds i8, i8* %input_col, i32 %mul14
+ %add.ptr15 = getelementptr inbounds i8, ptr %input_col, i32 %mul14
%add = add nuw nsw i32 %conv5, 7
%div = lshr i32 %add, 3
%conv25 = trunc i32 %col_offset to i16
@@ -785,14 +785,14 @@ for.body.lr.ph: ; preds = %for.cond.preheader
for.body: ; preds = %for.cond.cleanup23, %for.body.lr.ph
%i_out_ch.0129 = phi i32 [ 0, %for.body.lr.ph ], [ %inc37, %for.cond.cleanup23 ]
%i_row_loop.0128 = phi i32 [ undef, %for.body.lr.ph ], [ %i_row_loop.1.lcssa, %for.cond.cleanup23 ]
- %arrayidx = getelementptr inbounds i32, i32* %bias, i32 %i_out_ch.0129
- %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %bias, i32 %i_out_ch.0129
+ %0 = load i32, ptr %arrayidx, align 4
%cmp21111 = icmp slt i32 %i_row_loop.0128, %div
br i1 %cmp21111, label %for.body24.preheader, label %for.cond.cleanup23
for.body24.preheader: ; preds = %for.body
%mul = mul nuw nsw i32 %i_out_ch.0129, %conv5
- %add.ptr = getelementptr inbounds i8, i8* %input_row, i32 %mul
+ %add.ptr = getelementptr inbounds i8, ptr %input_row, i32 %mul
br label %for.body24
for.cond.cleanup23: ; preds = %for.body24, %for.body
@@ -805,49 +805,49 @@ for.cond.cleanup23: ; preds = %for.body24, %for.bo
%add32 = add nsw i32 %add31, %acc_2.0.lcssa
%add33 = add nsw i32 %add32, %acc_3.0.lcssa
%conv34 = trunc i32 %add33 to i8
- %arrayidx35 = getelementptr inbounds i8, i8* %out, i32 %i_out_ch.0129
- store i8 %conv34, i8* %arrayidx35, align 1
+ %arrayidx35 = getelementptr inbounds i8, ptr %out, i32 %i_out_ch.0129
+ store i8 %conv34, ptr %arrayidx35, align 1
%inc37 = add nuw nsw i32 %i_out_ch.0129, 1
%exitcond133 = icmp eq i32 %inc37, %conv2
br i1 %exitcond133, label %if.end, label %for.body
for.body24: ; preds = %for.body24, %for.body24.preheader
%row_len_tmp.0122 = phi i32 [ %sub, %for.body24 ], [ %conv5, %for.body24.preheader ]
- %ip_r0.0121 = phi i8* [ %add.ptr26, %for.body24 ], [ %add.ptr, %for.body24.preheader ]
- %ip_c0.0120 = phi i8* [ %add.ptr27, %for.body24 ], [ %input_col, %for.body24.preheader ]
- %ip_c1.0119 = phi i8* [ %add.ptr28, %for.body24 ], [ %add.ptr9, %for.body24.preheader ]
- %ip_c2.0118 = phi i8* [ %add.ptr29, %for.body24 ], [ %add.ptr12, %for.body24.preheader ]
+ %ip_r0.0121 = phi ptr [ %add.ptr26, %for.body24 ], [ %add.ptr, %for.body24.preheader ]
+ %ip_c0.0120 = phi ptr [ %add.ptr27, %for.body24 ], [ %input_col, %for.body24.preheader ]
+ %ip_c1.0119 = phi ptr [ %add.ptr28, %for.body24 ], [ %add.ptr9, %for.body24.preheader ]
+ %ip_c2.0118 = phi ptr [ %add.ptr29, %for.body24 ], [ %add.ptr12, %for.body24.preheader ]
%i_row_loop.1117 = phi i32 [ %inc, %for.body24 ], [ %i_row_loop.0128, %for.body24.preheader ]
- %ip_c3.0116 = phi i8* [ %add.ptr30, %for.body24 ], [ %add.ptr15, %for.body24.preheader ]
+ %ip_c3.0116 = phi ptr [ %add.ptr30, %for.body24 ], [ %add.ptr15, %for.body24.preheader ]
%acc_3.0115 = phi i32 [ %24, %for.body24 ], [ %0, %for.body24.preheader ]
%acc_2.0114 = phi i32 [ %23, %for.body24 ], [ %0, %for.body24.preheader ]
%acc_1.0113 = phi i32 [ %22, %for.body24 ], [ %0, %for.body24.preheader ]
%acc_0.0112 = phi i32 [ %21, %for.body24 ], [ %0, %for.body24.preheader ]
%1 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %row_len_tmp.0122)
%sub = add nsw i32 %row_len_tmp.0122, -8
- %2 = bitcast i8* %ip_r0.0121 to <8 x i8>*
- %3 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %2, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
+ %2 = bitcast ptr %ip_r0.0121 to ptr
+ %3 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %2, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
%4 = sext <8 x i8> %3 to <8 x i16>
- %add.ptr26 = getelementptr inbounds i8, i8* %ip_r0.0121, i32 8
- %5 = bitcast i8* %ip_c0.0120 to <8 x i8>*
- %6 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %5, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
+ %add.ptr26 = getelementptr inbounds i8, ptr %ip_r0.0121, i32 8
+ %5 = bitcast ptr %ip_c0.0120 to ptr
+ %6 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %5, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
%7 = sext <8 x i8> %6 to <8 x i16>
- %add.ptr27 = getelementptr inbounds i8, i8* %ip_c0.0120, i32 8
+ %add.ptr27 = getelementptr inbounds i8, ptr %ip_c0.0120, i32 8
%8 = tail call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %7, <8 x i16> %.splat.i, <8 x i1> %1, <8 x i16> undef)
- %9 = bitcast i8* %ip_c1.0119 to <8 x i8>*
- %10 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %9, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
+ %9 = bitcast ptr %ip_c1.0119 to ptr
+ %10 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %9, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
%11 = sext <8 x i8> %10 to <8 x i16>
- %add.ptr28 = getelementptr inbounds i8, i8* %ip_c1.0119, i32 8
+ %add.ptr28 = getelementptr inbounds i8, ptr %ip_c1.0119, i32 8
%12 = tail call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %11, <8 x i16> %.splat.i, <8 x i1> %1, <8 x i16> undef)
- %13 = bitcast i8* %ip_c2.0118 to <8 x i8>*
- %14 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %13, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
+ %13 = bitcast ptr %ip_c2.0118 to ptr
+ %14 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %13, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
%15 = sext <8 x i8> %14 to <8 x i16>
- %add.ptr29 = getelementptr inbounds i8, i8* %ip_c2.0118, i32 8
+ %add.ptr29 = getelementptr inbounds i8, ptr %ip_c2.0118, i32 8
%16 = tail call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %15, <8 x i16> %.splat.i, <8 x i1> %1, <8 x i16> undef)
- %17 = bitcast i8* %ip_c3.0116 to <8 x i8>*
- %18 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %17, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
+ %17 = bitcast ptr %ip_c3.0116 to ptr
+ %18 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %17, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
%19 = sext <8 x i8> %18 to <8 x i16>
- %add.ptr30 = getelementptr inbounds i8, i8* %ip_c3.0116, i32 8
+ %add.ptr30 = getelementptr inbounds i8, ptr %ip_c3.0116, i32 8
%20 = tail call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %19, <8 x i16> %.splat.i, <8 x i1> %1, <8 x i16> undef)
%21 = tail call i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 0, i32 %acc_0.0112, <8 x i16> %4, <8 x i16> %8, <8 x i1> %1)
%22 = tail call i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 0, i32 %acc_1.0113, <8 x i16> %4, <8 x i16> %12, <8 x i1> %1)
@@ -858,10 +858,10 @@ for.body24: ; preds = %for.body24, %for.bo
br i1 %exitcond, label %for.cond.cleanup23, label %for.body24
if.end: ; preds = %for.cond.cleanup23, %for.cond.preheader, %entry
- ret i8* %out
+ ret ptr %out
}
-define i8* @signext_optsize(i8* %input_row, i8* %input_col, i16 zeroext %output_ch, i16 zeroext %num_cols, i32* nocapture readnone %output_shift, i32* nocapture readnone %output_mult, i32 %out_offset, i32 %col_offset, i32 %row_offset, i16 signext %activation_min, i16 signext %activation_max, i16 zeroext %row_len, i32* nocapture readonly %bias, i8* returned %out) optsize {
+define ptr @signext_optsize(ptr %input_row, ptr %input_col, i16 zeroext %output_ch, i16 zeroext %num_cols, ptr nocapture readnone %output_shift, ptr nocapture readnone %output_mult, i32 %out_offset, i32 %col_offset, i32 %row_offset, i16 signext %activation_min, i16 signext %activation_max, i16 zeroext %row_len, ptr nocapture readonly %bias, ptr returned %out) optsize {
; CHECK-LABEL: signext_optsize:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -958,11 +958,11 @@ for.cond.preheader: ; preds = %entry
for.body.lr.ph: ; preds = %for.cond.preheader
%conv5 = zext i16 %row_len to i32
- %add.ptr9 = getelementptr inbounds i8, i8* %input_col, i32 %conv5
+ %add.ptr9 = getelementptr inbounds i8, ptr %input_col, i32 %conv5
%mul11 = shl nuw nsw i32 %conv5, 1
- %add.ptr12 = getelementptr inbounds i8, i8* %input_col, i32 %mul11
+ %add.ptr12 = getelementptr inbounds i8, ptr %input_col, i32 %mul11
%mul14 = mul nuw nsw i32 %conv5, 3
- %add.ptr15 = getelementptr inbounds i8, i8* %input_col, i32 %mul14
+ %add.ptr15 = getelementptr inbounds i8, ptr %input_col, i32 %mul14
%add = add nuw nsw i32 %conv5, 7
%div = lshr i32 %add, 3
%conv25 = trunc i32 %col_offset to i16
@@ -973,14 +973,14 @@ for.body.lr.ph: ; preds = %for.cond.preheader
for.body: ; preds = %for.cond.cleanup23, %for.body.lr.ph
%i_out_ch.0129 = phi i32 [ 0, %for.body.lr.ph ], [ %inc37, %for.cond.cleanup23 ]
%i_row_loop.0128 = phi i32 [ undef, %for.body.lr.ph ], [ %i_row_loop.1.lcssa, %for.cond.cleanup23 ]
- %arrayidx = getelementptr inbounds i32, i32* %bias, i32 %i_out_ch.0129
- %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %bias, i32 %i_out_ch.0129
+ %0 = load i32, ptr %arrayidx, align 4
%cmp21111 = icmp slt i32 %i_row_loop.0128, %div
br i1 %cmp21111, label %for.body24.preheader, label %for.cond.cleanup23
for.body24.preheader: ; preds = %for.body
%mul = mul nuw nsw i32 %i_out_ch.0129, %conv5
- %add.ptr = getelementptr inbounds i8, i8* %input_row, i32 %mul
+ %add.ptr = getelementptr inbounds i8, ptr %input_row, i32 %mul
br label %for.body24
for.cond.cleanup23: ; preds = %for.body24, %for.body
@@ -993,49 +993,49 @@ for.cond.cleanup23: ; preds = %for.body24, %for.bo
%add32 = add nsw i32 %add31, %acc_2.0.lcssa
%add33 = add nsw i32 %add32, %acc_3.0.lcssa
%conv34 = trunc i32 %add33 to i8
- %arrayidx35 = getelementptr inbounds i8, i8* %out, i32 %i_out_ch.0129
- store i8 %conv34, i8* %arrayidx35, align 1
+ %arrayidx35 = getelementptr inbounds i8, ptr %out, i32 %i_out_ch.0129
+ store i8 %conv34, ptr %arrayidx35, align 1
%inc37 = add nuw nsw i32 %i_out_ch.0129, 1
%exitcond133 = icmp eq i32 %inc37, %conv2
br i1 %exitcond133, label %if.end, label %for.body
for.body24: ; preds = %for.body24, %for.body24.preheader
%row_len_tmp.0122 = phi i32 [ %sub, %for.body24 ], [ %conv5, %for.body24.preheader ]
- %ip_r0.0121 = phi i8* [ %add.ptr26, %for.body24 ], [ %add.ptr, %for.body24.preheader ]
- %ip_c0.0120 = phi i8* [ %add.ptr27, %for.body24 ], [ %input_col, %for.body24.preheader ]
- %ip_c1.0119 = phi i8* [ %add.ptr28, %for.body24 ], [ %add.ptr9, %for.body24.preheader ]
- %ip_c2.0118 = phi i8* [ %add.ptr29, %for.body24 ], [ %add.ptr12, %for.body24.preheader ]
+ %ip_r0.0121 = phi ptr [ %add.ptr26, %for.body24 ], [ %add.ptr, %for.body24.preheader ]
+ %ip_c0.0120 = phi ptr [ %add.ptr27, %for.body24 ], [ %input_col, %for.body24.preheader ]
+ %ip_c1.0119 = phi ptr [ %add.ptr28, %for.body24 ], [ %add.ptr9, %for.body24.preheader ]
+ %ip_c2.0118 = phi ptr [ %add.ptr29, %for.body24 ], [ %add.ptr12, %for.body24.preheader ]
%i_row_loop.1117 = phi i32 [ %inc, %for.body24 ], [ %i_row_loop.0128, %for.body24.preheader ]
- %ip_c3.0116 = phi i8* [ %add.ptr30, %for.body24 ], [ %add.ptr15, %for.body24.preheader ]
+ %ip_c3.0116 = phi ptr [ %add.ptr30, %for.body24 ], [ %add.ptr15, %for.body24.preheader ]
%acc_3.0115 = phi i32 [ %24, %for.body24 ], [ %0, %for.body24.preheader ]
%acc_2.0114 = phi i32 [ %23, %for.body24 ], [ %0, %for.body24.preheader ]
%acc_1.0113 = phi i32 [ %22, %for.body24 ], [ %0, %for.body24.preheader ]
%acc_0.0112 = phi i32 [ %21, %for.body24 ], [ %0, %for.body24.preheader ]
%1 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %row_len_tmp.0122)
%sub = add nsw i32 %row_len_tmp.0122, -8
- %2 = bitcast i8* %ip_r0.0121 to <8 x i8>*
- %3 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %2, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
+ %2 = bitcast ptr %ip_r0.0121 to ptr
+ %3 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %2, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
%4 = sext <8 x i8> %3 to <8 x i16>
- %add.ptr26 = getelementptr inbounds i8, i8* %ip_r0.0121, i32 8
- %5 = bitcast i8* %ip_c0.0120 to <8 x i8>*
- %6 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %5, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
+ %add.ptr26 = getelementptr inbounds i8, ptr %ip_r0.0121, i32 8
+ %5 = bitcast ptr %ip_c0.0120 to ptr
+ %6 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %5, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
%7 = sext <8 x i8> %6 to <8 x i16>
- %add.ptr27 = getelementptr inbounds i8, i8* %ip_c0.0120, i32 8
+ %add.ptr27 = getelementptr inbounds i8, ptr %ip_c0.0120, i32 8
%8 = tail call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %7, <8 x i16> %.splat.i, <8 x i1> %1, <8 x i16> undef)
- %9 = bitcast i8* %ip_c1.0119 to <8 x i8>*
- %10 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %9, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
+ %9 = bitcast ptr %ip_c1.0119 to ptr
+ %10 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %9, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
%11 = sext <8 x i8> %10 to <8 x i16>
- %add.ptr28 = getelementptr inbounds i8, i8* %ip_c1.0119, i32 8
+ %add.ptr28 = getelementptr inbounds i8, ptr %ip_c1.0119, i32 8
%12 = tail call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %11, <8 x i16> %.splat.i, <8 x i1> %1, <8 x i16> undef)
- %13 = bitcast i8* %ip_c2.0118 to <8 x i8>*
- %14 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %13, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
+ %13 = bitcast ptr %ip_c2.0118 to ptr
+ %14 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %13, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
%15 = sext <8 x i8> %14 to <8 x i16>
- %add.ptr29 = getelementptr inbounds i8, i8* %ip_c2.0118, i32 8
+ %add.ptr29 = getelementptr inbounds i8, ptr %ip_c2.0118, i32 8
%16 = tail call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %15, <8 x i16> %.splat.i, <8 x i1> %1, <8 x i16> undef)
- %17 = bitcast i8* %ip_c3.0116 to <8 x i8>*
- %18 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %17, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
+ %17 = bitcast ptr %ip_c3.0116 to ptr
+ %18 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %17, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
%19 = sext <8 x i8> %18 to <8 x i16>
- %add.ptr30 = getelementptr inbounds i8, i8* %ip_c3.0116, i32 8
+ %add.ptr30 = getelementptr inbounds i8, ptr %ip_c3.0116, i32 8
%20 = tail call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %19, <8 x i16> %.splat.i, <8 x i1> %1, <8 x i16> undef)
%21 = tail call i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 0, i32 %acc_0.0112, <8 x i16> %4, <8 x i16> %8, <8 x i1> %1)
%22 = tail call i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 0, i32 %acc_1.0113, <8 x i16> %4, <8 x i16> %12, <8 x i1> %1)
@@ -1046,11 +1046,11 @@ for.body24: ; preds = %for.body24, %for.bo
br i1 %exitcond, label %for.cond.cleanup23, label %for.body24
if.end: ; preds = %for.cond.cleanup23, %for.cond.preheader, %entry
- ret i8* %out
+ ret ptr %out
}
-%struct.arm_cfft_instance_f32 = type { i16, float*, i16*, i16, i32*, i32*, i32*, float*, float*, float* }
-define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cfft_instance_f32Pfjf(%struct.arm_cfft_instance_f32* nocapture readonly %0, float* %1, i32 %2, float %3) {
+%struct.arm_cfft_instance_f32 = type { i16, ptr, ptr, i16, ptr, ptr, ptr, ptr, ptr, ptr }
+define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cfft_instance_f32Pfjf(ptr nocapture readonly %0, ptr %1, i32 %2, float %3) {
; CHECK-LABEL: _Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cfft_instance_f32Pfjf:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -1210,12 +1210,12 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf
6: ; preds = %4
%7 = lshr i32 %2, 2
- %8 = getelementptr inbounds %struct.arm_cfft_instance_f32, %struct.arm_cfft_instance_f32* %0, i32 0, i32 7
- %9 = getelementptr inbounds %struct.arm_cfft_instance_f32, %struct.arm_cfft_instance_f32* %0, i32 0, i32 4
- %10 = getelementptr inbounds %struct.arm_cfft_instance_f32, %struct.arm_cfft_instance_f32* %0, i32 0, i32 8
- %11 = getelementptr inbounds %struct.arm_cfft_instance_f32, %struct.arm_cfft_instance_f32* %0, i32 0, i32 5
- %12 = getelementptr inbounds %struct.arm_cfft_instance_f32, %struct.arm_cfft_instance_f32* %0, i32 0, i32 9
- %13 = getelementptr inbounds %struct.arm_cfft_instance_f32, %struct.arm_cfft_instance_f32* %0, i32 0, i32 6
+ %8 = getelementptr inbounds %struct.arm_cfft_instance_f32, ptr %0, i32 0, i32 7
+ %9 = getelementptr inbounds %struct.arm_cfft_instance_f32, ptr %0, i32 0, i32 4
+ %10 = getelementptr inbounds %struct.arm_cfft_instance_f32, ptr %0, i32 0, i32 8
+ %11 = getelementptr inbounds %struct.arm_cfft_instance_f32, ptr %0, i32 0, i32 5
+ %12 = getelementptr inbounds %struct.arm_cfft_instance_f32, ptr %0, i32 0, i32 9
+ %13 = getelementptr inbounds %struct.arm_cfft_instance_f32, ptr %0, i32 0, i32 6
br label %14
14: ; preds = %6, %40
@@ -1235,7 +1235,7 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf
br i1 %25, label %40, label %45
26: ; preds = %40, %4
- %27 = ptrtoint float* %1 to i32
+ %27 = ptrtoint ptr %1 to i32
%28 = insertelement <4 x i32> undef, i32 %27, i32 0
%29 = shufflevector <4 x i32> %28, <4 x i32> undef, <4 x i32> zeroinitializer
%30 = add <4 x i32> %29, <i32 -64, i32 -60, i32 -32, i32 -28>
@@ -1261,76 +1261,76 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf
45: ; preds = %21, %113
%46 = phi i32 [ %114, %113 ], [ 0, %21 ]
- %47 = load float*, float** %8, align 4
- %48 = load i32*, i32** %9, align 4
- %49 = getelementptr inbounds i32, i32* %48, i32 %18
- %50 = load i32, i32* %49, align 4
- %51 = getelementptr inbounds float, float* %47, i32 %50
- %52 = load float*, float** %10, align 4
- %53 = load i32*, i32** %11, align 4
- %54 = getelementptr inbounds i32, i32* %53, i32 %18
- %55 = load i32, i32* %54, align 4
- %56 = getelementptr inbounds float, float* %52, i32 %55
- %57 = load float*, float** %12, align 4
- %58 = load i32*, i32** %13, align 4
- %59 = getelementptr inbounds i32, i32* %58, i32 %18
- %60 = load i32, i32* %59, align 4
- %61 = getelementptr inbounds float, float* %57, i32 %60
+ %47 = load ptr, ptr %8, align 4
+ %48 = load ptr, ptr %9, align 4
+ %49 = getelementptr inbounds i32, ptr %48, i32 %18
+ %50 = load i32, ptr %49, align 4
+ %51 = getelementptr inbounds float, ptr %47, i32 %50
+ %52 = load ptr, ptr %10, align 4
+ %53 = load ptr, ptr %11, align 4
+ %54 = getelementptr inbounds i32, ptr %53, i32 %18
+ %55 = load i32, ptr %54, align 4
+ %56 = getelementptr inbounds float, ptr %52, i32 %55
+ %57 = load ptr, ptr %12, align 4
+ %58 = load ptr, ptr %13, align 4
+ %59 = getelementptr inbounds i32, ptr %58, i32 %18
+ %60 = load i32, ptr %59, align 4
+ %61 = getelementptr inbounds float, ptr %57, i32 %60
%62 = mul i32 %22, %46
- %63 = getelementptr inbounds float, float* %1, i32 %62
- %64 = getelementptr inbounds float, float* %63, i32 %23
- %65 = getelementptr inbounds float, float* %64, i32 %23
- %66 = getelementptr inbounds float, float* %65, i32 %23
+ %63 = getelementptr inbounds float, ptr %1, i32 %62
+ %64 = getelementptr inbounds float, ptr %63, i32 %23
+ %65 = getelementptr inbounds float, ptr %64, i32 %23
+ %66 = getelementptr inbounds float, ptr %65, i32 %23
br label %67
67: ; preds = %45, %67
- %68 = phi float* [ %63, %45 ], [ %89, %67 ]
- %69 = phi float* [ %65, %45 ], [ %103, %67 ]
- %70 = phi float* [ %66, %45 ], [ %110, %67 ]
- %71 = phi float* [ %64, %45 ], [ %96, %67 ]
- %72 = phi float* [ %61, %45 ], [ %107, %67 ]
- %73 = phi float* [ %56, %45 ], [ %93, %67 ]
- %74 = phi float* [ %51, %45 ], [ %100, %67 ]
+ %68 = phi ptr [ %63, %45 ], [ %89, %67 ]
+ %69 = phi ptr [ %65, %45 ], [ %103, %67 ]
+ %70 = phi ptr [ %66, %45 ], [ %110, %67 ]
+ %71 = phi ptr [ %64, %45 ], [ %96, %67 ]
+ %72 = phi ptr [ %61, %45 ], [ %107, %67 ]
+ %73 = phi ptr [ %56, %45 ], [ %93, %67 ]
+ %74 = phi ptr [ %51, %45 ], [ %100, %67 ]
%75 = phi i32 [ %24, %45 ], [ %111, %67 ]
- %76 = bitcast float* %69 to <4 x float>*
- %77 = bitcast float* %68 to <4 x float>*
- %78 = load <4 x float>, <4 x float>* %76, align 4
- %79 = load <4 x float>, <4 x float>* %77, align 4
- %80 = bitcast float* %71 to <4 x float>*
- %81 = load <4 x float>, <4 x float>* %80, align 4
- %82 = bitcast float* %70 to <4 x float>*
- %83 = load <4 x float>, <4 x float>* %82, align 4
+ %76 = bitcast ptr %69 to ptr
+ %77 = bitcast ptr %68 to ptr
+ %78 = load <4 x float>, ptr %76, align 4
+ %79 = load <4 x float>, ptr %77, align 4
+ %80 = bitcast ptr %71 to ptr
+ %81 = load <4 x float>, ptr %80, align 4
+ %82 = bitcast ptr %70 to ptr
+ %83 = load <4 x float>, ptr %82, align 4
%84 = fadd <4 x float> %78, %79
%85 = fsub <4 x float> %79, %78
%86 = fadd <4 x float> %81, %83
%87 = fsub <4 x float> %81, %83
%88 = fadd <4 x float> %84, %86
- store <4 x float> %88, <4 x float>* %77, align 4
- %89 = getelementptr inbounds float, float* %68, i32 4
+ store <4 x float> %88, ptr %77, align 4
+ %89 = getelementptr inbounds float, ptr %68, i32 4
%90 = fsub <4 x float> %84, %86
- %91 = bitcast float* %73 to <4 x float>*
- %92 = load <4 x float>, <4 x float>* %91, align 4
- %93 = getelementptr inbounds float, float* %73, i32 4
+ %91 = bitcast ptr %73 to ptr
+ %92 = load <4 x float>, ptr %91, align 4
+ %93 = getelementptr inbounds float, ptr %73, i32 4
%94 = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %92, <4 x float> %90)
%95 = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> %94, <4 x float> %92, <4 x float> %90)
- store <4 x float> %95, <4 x float>* %80, align 4
- %96 = getelementptr inbounds float, float* %71, i32 4
+ store <4 x float> %95, ptr %80, align 4
+ %96 = getelementptr inbounds float, ptr %71, i32 4
%97 = tail call <4 x float> @llvm.arm.mve.vcaddq.v4f32(i32 1, i32 0, <4 x float> %85, <4 x float> %87)
- %98 = bitcast float* %74 to <4 x float>*
- %99 = load <4 x float>, <4 x float>* %98, align 4
- %100 = getelementptr inbounds float, float* %74, i32 4
+ %98 = bitcast ptr %74 to ptr
+ %99 = load <4 x float>, ptr %98, align 4
+ %100 = getelementptr inbounds float, ptr %74, i32 4
%101 = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %99, <4 x float> %97)
%102 = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> %101, <4 x float> %99, <4 x float> %97)
- store <4 x float> %102, <4 x float>* %76, align 4
- %103 = getelementptr inbounds float, float* %69, i32 4
+ store <4 x float> %102, ptr %76, align 4
+ %103 = getelementptr inbounds float, ptr %69, i32 4
%104 = tail call <4 x float> @llvm.arm.mve.vcaddq.v4f32(i32 1, i32 1, <4 x float> %85, <4 x float> %87)
- %105 = bitcast float* %72 to <4 x float>*
- %106 = load <4 x float>, <4 x float>* %105, align 4
- %107 = getelementptr inbounds float, float* %72, i32 4
+ %105 = bitcast ptr %72 to ptr
+ %106 = load <4 x float>, ptr %105, align 4
+ %107 = getelementptr inbounds float, ptr %72, i32 4
%108 = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %106, <4 x float> %104)
%109 = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> %108, <4 x float> %106, <4 x float> %104)
- store <4 x float> %109, <4 x float>* %82, align 4
- %110 = getelementptr inbounds float, float* %70, i32 4
+ store <4 x float> %109, ptr %82, align 4
+ %110 = getelementptr inbounds float, ptr %70, i32 4
%111 = add nsw i32 %75, -1
%112 = icmp eq i32 %111, 0
br i1 %112, label %113, label %67
@@ -1378,10 +1378,10 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf
declare <16 x i1> @llvm.arm.mve.vctp8(i32)
declare <8 x i1> @llvm.arm.mve.vctp16(i32)
declare i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1>)
-declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
-declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>)
-declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>)
-declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>)
+declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>)
+declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32, <8 x i1>, <8 x i8>)
+declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>)
+declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32 immarg, <4 x i1>)
declare i32 @llvm.vector.reduce.add.v16i8(<16 x i32> %ext4)
declare i32 @llvm.arm.mve.vmldava.v8i16(i32, i32, i32, i32, <8 x i16>, <8 x i16>)
declare i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32, i32, i32, i32, <16 x i8>, <16 x i8>, <16 x i1>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll
index 35e6dd4a2d51d..bf6468baac22b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -verify-machineinstrs -o - | FileCheck %s
-define void @arm_min_helium_f32(float* %pSrc, i32 %blockSize, float* nocapture %pResult, i32* nocapture %pIndex) {
+define void @arm_min_helium_f32(ptr %pSrc, i32 %blockSize, ptr nocapture %pResult, ptr nocapture %pIndex) {
; CHECK-LABEL: arm_min_helium_f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r6, r7, lr}
@@ -48,17 +48,17 @@ do.body: ; preds = %do.body, %entry
%indexVec.0 = phi <4 x i32> [ %1, %entry ], [ %11, %do.body ]
%2 = phi <4 x float> [ zeroinitializer, %entry ], [ %10, %do.body ]
%blkCnt.0 = phi i32 [ %blockSize, %entry ], [ %sub, %do.body ]
- %pSrc.addr.0 = phi float* [ %pSrc, %entry ], [ %add.ptr, %do.body ]
+ %pSrc.addr.0 = phi ptr [ %pSrc, %entry ], [ %add.ptr, %do.body ]
%3 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0)
- %4 = bitcast float* %pSrc.addr.0 to <4 x float>*
- %5 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %4, i32 4, <4 x i1> %3, <4 x float> zeroinitializer)
+ %4 = bitcast ptr %pSrc.addr.0 to ptr
+ %5 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %4, i32 4, <4 x i1> %3, <4 x float> zeroinitializer)
%6 = fcmp fast ole <4 x float> %5, %curExtremValVec.0
%7 = and <4 x i1> %6, %3
%8 = select fast <4 x i1> %7, <4 x float> %5, <4 x float> %curExtremValVec.0
%9 = bitcast <4 x i32> %indexVec.0 to <4 x float>
%10 = select fast <4 x i1> %7, <4 x float> %9, <4 x float> %2
%11 = add <4 x i32> %indexVec.0, <i32 4, i32 4, i32 4, i32 4>
- %add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4
+ %add.ptr = getelementptr inbounds float, ptr %pSrc.addr.0, i32 4
%sub = add nsw i32 %blkCnt.0, -4
%cmp = icmp sgt i32 %blkCnt.0, 4
br i1 %cmp, label %do.body, label %do.end
@@ -73,13 +73,13 @@ do.end: ; preds = %do.body
%.splat2 = shufflevector <4 x i32> %.splatinsert1, <4 x i32> undef, <4 x i32> zeroinitializer
%15 = select <4 x i1> %14, <4 x i32> %12, <4 x i32> %.splat2
%16 = tail call i32 @llvm.arm.mve.minv.v4i32(i32 %blockSize, <4 x i32> %15, i32 1)
- store i32 %16, i32* %pIndex, align 4
- store float %13, float* %pResult, align 4
+ store i32 %16, ptr %pIndex, align 4
+ store float %13, ptr %pResult, align 4
ret void
}
declare { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32, i32) #1
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1
-declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #2
+declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) #2
declare float @llvm.arm.mve.minnmv.f32.v4f32(float, <4 x float>) #1
declare i32 @llvm.arm.mve.minv.v4i32(i32, <4 x i32>, i32) #1
diff --git a/llvm/test/CodeGen/Thumb2/mve-qrintrsplat.ll b/llvm/test/CodeGen/Thumb2/mve-qrintrsplat.ll
index f3e8ea8011927..29c4fb902bf36 100644
--- a/llvm/test/CodeGen/Thumb2/mve-qrintrsplat.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-qrintrsplat.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
-define void @vaddq(i32* %x, i32* %y, i32 %n) {
+define void @vaddq(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vaddq:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -28,23 +28,23 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012)
- %1 = bitcast i32* %x.addr.014 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4
+ %1 = bitcast ptr %x.addr.014 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4
%3 = add <4 x i32> %2, <i32 10, i32 10, i32 10, i32 10>
- %4 = bitcast i32* %y.addr.013 to <4 x i32>*
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4
+ %4 = bitcast ptr %y.addr.013 to ptr
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4
%sub = add nsw i32 %i.012, -4
%cmp = icmp sgt i32 %i.012, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vadd(i32* %s1, i32 %c0, i32 %N) {
+define void @vadd(ptr %s1, i32 %c0, i32 %N) {
; CHECK-LABEL: vadd:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -72,14 +72,14 @@ while.body.lr.ph: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.lr.ph, %while.body
- %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
+ %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
- %1 = bitcast i32* %s1.addr.013 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %1 = bitcast ptr %s1.addr.013 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
%3 = tail call <4 x i32> @llvm.arm.mve.add.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %.splat, <4 x i1> %0, <4 x i32> %2)
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0)
- %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %1, i32 4, <4 x i1> %0)
+ %add.ptr = getelementptr inbounds i32, ptr %s1.addr.013, i32 4
%sub = add nsw i32 %N.addr.012, -4
%cmp = icmp sgt i32 %N.addr.012, 4
br i1 %cmp, label %while.body, label %while.end
@@ -88,7 +88,7 @@ while.end: ; preds = %while.body, %entry
ret void
}
-define void @vsubq(i32* %x, i32* %y, i32 %n) {
+define void @vsubq(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vsubq:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -115,23 +115,23 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012)
- %1 = bitcast i32* %x.addr.014 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4
+ %1 = bitcast ptr %x.addr.014 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4
%3 = sub <4 x i32> %2, <i32 10, i32 10, i32 10, i32 10>
- %4 = bitcast i32* %y.addr.013 to <4 x i32>*
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4
+ %4 = bitcast ptr %y.addr.013 to ptr
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4
%sub = add nsw i32 %i.012, -4
%cmp = icmp sgt i32 %i.012, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vsub(i32* %s1, i32 %N) {
+define void @vsub(ptr %s1, i32 %N) {
; CHECK-LABEL: vsub:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -158,14 +158,14 @@ while.body.lr.ph: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.lr.ph, %while.body
- %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
+ %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
- %1 = bitcast i32* %s1.addr.013 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %1 = bitcast ptr %s1.addr.013 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
%3 = tail call <4 x i32> @llvm.arm.mve.sub.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, <4 x i1> %0, <4 x i32> %2)
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0)
- %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %1, i32 4, <4 x i1> %0)
+ %add.ptr = getelementptr inbounds i32, ptr %s1.addr.013, i32 4
%sub = add nsw i32 %N.addr.012, -4
%cmp = icmp sgt i32 %N.addr.012, 4
br i1 %cmp, label %while.body, label %while.end
@@ -174,7 +174,7 @@ while.end: ; preds = %while.body, %entry
ret void
}
-define void @vmulq(i32* %x, i32* %y, i32 %n) {
+define void @vmulq(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vmulq:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -201,23 +201,23 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012)
- %1 = bitcast i32* %x.addr.014 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4
+ %1 = bitcast ptr %x.addr.014 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4
%3 = mul <4 x i32> %2, <i32 10, i32 10, i32 10, i32 10>
- %4 = bitcast i32* %y.addr.013 to <4 x i32>*
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4
+ %4 = bitcast ptr %y.addr.013 to ptr
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4
%sub = add nsw i32 %i.012, -4
%cmp = icmp sgt i32 %i.012, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vmul(i32* %s1, i32 %N) {
+define void @vmul(ptr %s1, i32 %N) {
; CHECK-LABEL: vmul:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -244,14 +244,14 @@ while.body.lr.ph: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.lr.ph, %while.body
- %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
+ %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
- %1 = bitcast i32* %s1.addr.013 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %1 = bitcast ptr %s1.addr.013 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
%3 = tail call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, <4 x i1> %0, <4 x i32> %2)
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0)
- %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %1, i32 4, <4 x i1> %0)
+ %add.ptr = getelementptr inbounds i32, ptr %s1.addr.013, i32 4
%sub = add nsw i32 %N.addr.012, -4
%cmp = icmp sgt i32 %N.addr.012, 4
br i1 %cmp, label %while.body, label %while.end
@@ -260,7 +260,7 @@ while.end: ; preds = %while.body, %entry
ret void
}
-define void @vqaddq(i32* %x, i32* %y, i32 %n) {
+define void @vqaddq(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vqaddq:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -287,23 +287,23 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012)
- %1 = bitcast i32* %x.addr.014 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4
+ %1 = bitcast ptr %x.addr.014 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4
%3 = tail call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>)
- %4 = bitcast i32* %y.addr.013 to <4 x i32>*
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4
+ %4 = bitcast ptr %y.addr.013 to ptr
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4
%sub = add nsw i32 %i.012, -4
%cmp = icmp sgt i32 %i.012, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vqaddqu(i32* %x, i32* %y, i32 %n) {
+define void @vqaddqu(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vqaddqu:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -330,23 +330,23 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012)
- %1 = bitcast i32* %x.addr.014 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4
+ %1 = bitcast ptr %x.addr.014 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4
%3 = tail call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>)
- %4 = bitcast i32* %y.addr.013 to <4 x i32>*
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4
+ %4 = bitcast ptr %y.addr.013 to ptr
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4
%sub = add nsw i32 %i.012, -4
%cmp = icmp sgt i32 %i.012, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vqadd(i32* %s1, i32 %N) {
+define void @vqadd(ptr %s1, i32 %N) {
; CHECK-LABEL: vqadd:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -373,14 +373,14 @@ while.body.lr.ph: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.lr.ph, %while.body
- %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
+ %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
- %1 = bitcast i32* %s1.addr.013 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %1 = bitcast ptr %s1.addr.013 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
%3 = tail call <4 x i32> @llvm.arm.mve.qadd.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, i32 0, <4 x i1> %0, <4 x i32> %2)
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0)
- %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %1, i32 4, <4 x i1> %0)
+ %add.ptr = getelementptr inbounds i32, ptr %s1.addr.013, i32 4
%sub = add nsw i32 %N.addr.012, -4
%cmp = icmp sgt i32 %N.addr.012, 4
br i1 %cmp, label %while.body, label %while.end
@@ -389,7 +389,7 @@ while.end: ; preds = %while.body, %entry
ret void
}
-define void @vqsubq(i32* %x, i32* %y, i32 %n) {
+define void @vqsubq(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vqsubq:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -416,23 +416,23 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012)
- %1 = bitcast i32* %x.addr.014 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4
+ %1 = bitcast ptr %x.addr.014 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4
%3 = tail call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>)
- %4 = bitcast i32* %y.addr.013 to <4 x i32>*
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4
+ %4 = bitcast ptr %y.addr.013 to ptr
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4
%sub = add nsw i32 %i.012, -4
%cmp = icmp sgt i32 %i.012, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vqsubqu(i32* %x, i32* %y, i32 %n) {
+define void @vqsubqu(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vqsubqu:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -459,23 +459,23 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012)
- %1 = bitcast i32* %x.addr.014 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4
+ %1 = bitcast ptr %x.addr.014 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4
%3 = tail call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>)
- %4 = bitcast i32* %y.addr.013 to <4 x i32>*
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4
+ %4 = bitcast ptr %y.addr.013 to ptr
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4
%sub = add nsw i32 %i.012, -4
%cmp = icmp sgt i32 %i.012, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vqsub(i32* %s1, i32 %N) {
+define void @vqsub(ptr %s1, i32 %N) {
; CHECK-LABEL: vqsub:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -502,14 +502,14 @@ while.body.lr.ph: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.lr.ph, %while.body
- %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
+ %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
- %1 = bitcast i32* %s1.addr.013 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %1 = bitcast ptr %s1.addr.013 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
%3 = tail call <4 x i32> @llvm.arm.mve.qsub.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, i32 0, <4 x i1> %0, <4 x i32> %2)
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0)
- %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %1, i32 4, <4 x i1> %0)
+ %add.ptr = getelementptr inbounds i32, ptr %s1.addr.013, i32 4
%sub = add nsw i32 %N.addr.012, -4
%cmp = icmp sgt i32 %N.addr.012, 4
br i1 %cmp, label %while.body, label %while.end
@@ -518,7 +518,7 @@ while.end: ; preds = %while.body, %entry
ret void
}
-define void @vhaddq(i32* %x, i32* %y, i32 %n) {
+define void @vhaddq(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vhaddq:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -545,23 +545,23 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012)
- %1 = bitcast i32* %x.addr.014 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4
+ %1 = bitcast ptr %x.addr.014 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4
%3 = tail call <4 x i32> @llvm.arm.mve.vhadd.v4i32(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, i32 0)
- %4 = bitcast i32* %y.addr.013 to <4 x i32>*
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4
+ %4 = bitcast ptr %y.addr.013 to ptr
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4
%sub = add nsw i32 %i.012, -4
%cmp = icmp sgt i32 %i.012, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vhadd(i32* %s1, i32 %N) {
+define void @vhadd(ptr %s1, i32 %N) {
; CHECK-LABEL: vhadd:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -588,14 +588,14 @@ while.body.lr.ph: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.lr.ph, %while.body
- %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
+ %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
- %1 = bitcast i32* %s1.addr.013 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %1 = bitcast ptr %s1.addr.013 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
%3 = tail call <4 x i32> @llvm.arm.mve.hadd.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, i32 0, <4 x i1> %0, <4 x i32> %2)
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0)
- %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %1, i32 4, <4 x i1> %0)
+ %add.ptr = getelementptr inbounds i32, ptr %s1.addr.013, i32 4
%sub = add nsw i32 %N.addr.012, -4
%cmp = icmp sgt i32 %N.addr.012, 4
br i1 %cmp, label %while.body, label %while.end
@@ -604,7 +604,7 @@ while.end: ; preds = %while.body, %entry
ret void
}
-define void @vhsubq(i32* %x, i32* %y, i32 %n) {
+define void @vhsubq(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vhsubq:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -631,23 +631,23 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012)
- %1 = bitcast i32* %x.addr.014 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4
+ %1 = bitcast ptr %x.addr.014 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4
%3 = tail call <4 x i32> @llvm.arm.mve.vhsub.v4i32(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, i32 0)
- %4 = bitcast i32* %y.addr.013 to <4 x i32>*
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4
+ %4 = bitcast ptr %y.addr.013 to ptr
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4
%sub = add nsw i32 %i.012, -4
%cmp = icmp sgt i32 %i.012, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vhsub(i32* %s1, i32 %N) {
+define void @vhsub(ptr %s1, i32 %N) {
; CHECK-LABEL: vhsub:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -674,14 +674,14 @@ while.body.lr.ph: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.lr.ph, %while.body
- %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
+ %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
- %1 = bitcast i32* %s1.addr.013 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %1 = bitcast ptr %s1.addr.013 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
%3 = tail call <4 x i32> @llvm.arm.mve.hsub.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, i32 0, <4 x i1> %0, <4 x i32> %2)
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0)
- %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %1, i32 4, <4 x i1> %0)
+ %add.ptr = getelementptr inbounds i32, ptr %s1.addr.013, i32 4
%sub = add nsw i32 %N.addr.012, -4
%cmp = icmp sgt i32 %N.addr.012, 4
br i1 %cmp, label %while.body, label %while.end
@@ -690,7 +690,7 @@ while.end: ; preds = %while.body, %entry
ret void
}
-define void @vqdmullbq(i32* %x, i32* %y, i32 %n) {
+define void @vqdmullbq(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vqdmullbq:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -717,25 +717,25 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012)
- %1 = bitcast i32* %x.addr.014 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4
+ %1 = bitcast ptr %x.addr.014 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4
%3 = tail call <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, i32 0)
%4 = bitcast <2 x i64> %3 to <4 x i32>
- %5 = bitcast i32* %y.addr.013 to <4 x i32>*
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %4, <4 x i32>* %5, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4
+ %5 = bitcast ptr %y.addr.013 to ptr
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %4, ptr %5, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4
%sub = add nsw i32 %i.012, -4
%cmp = icmp sgt i32 %i.012, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vqdmull(i32* %s1, i32 %N) {
+define void @vqdmull(ptr %s1, i32 %N) {
; CHECK-LABEL: vqdmull:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -762,17 +762,17 @@ while.body.lr.ph: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.lr.ph, %while.body
- %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
+ %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
- %1 = bitcast i32* %s1.addr.013 to <4 x i16>*
- %2 = tail call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %1, i32 2, <4 x i1> %0, <4 x i16> zeroinitializer)
+ %1 = bitcast ptr %s1.addr.013 to ptr
+ %2 = tail call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %1, i32 2, <4 x i1> %0, <4 x i16> zeroinitializer)
%3 = sext <4 x i16> %2 to <4 x i32>
%4 = bitcast <4 x i32> %3 to <8 x i16>
%5 = tail call <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16> %4, <8 x i16> <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>, i32 0, <4 x i1> %0, <4 x i32> %3)
- %6 = bitcast i32* %s1.addr.013 to <4 x i32>*
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %5, <4 x i32>* %6, i32 4, <4 x i1> %0)
- %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4
+ %6 = bitcast ptr %s1.addr.013 to ptr
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %5, ptr %6, i32 4, <4 x i1> %0)
+ %add.ptr = getelementptr inbounds i32, ptr %s1.addr.013, i32 4
%sub = add nsw i32 %N.addr.012, -4
%cmp = icmp sgt i32 %N.addr.012, 4
br i1 %cmp, label %while.body, label %while.end
@@ -781,7 +781,7 @@ while.end: ; preds = %while.body, %entry
ret void
}
-define void @vqdmulhq(i32* %x, i32* %y, i32 %n) {
+define void @vqdmulhq(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vqdmulhq:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -808,23 +808,23 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012)
- %1 = bitcast i32* %x.addr.014 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4
+ %1 = bitcast ptr %x.addr.014 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4
%3 = tail call <4 x i32> @llvm.arm.mve.vqdmulh.v4i32(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>)
- %4 = bitcast i32* %y.addr.013 to <4 x i32>*
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4
+ %4 = bitcast ptr %y.addr.013 to ptr
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4
%sub = add nsw i32 %i.012, -4
%cmp = icmp sgt i32 %i.012, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vqdmulh(i32* %s1, i32 %N) {
+define void @vqdmulh(ptr %s1, i32 %N) {
; CHECK-LABEL: vqdmulh:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -851,14 +851,14 @@ while.body.lr.ph: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.lr.ph, %while.body
- %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
+ %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
- %1 = bitcast i32* %s1.addr.013 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %1 = bitcast ptr %s1.addr.013 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
%3 = tail call <4 x i32> @llvm.arm.mve.qdmulh.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, <4 x i1> %0, <4 x i32> %2)
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0)
- %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %1, i32 4, <4 x i1> %0)
+ %add.ptr = getelementptr inbounds i32, ptr %s1.addr.013, i32 4
%sub = add nsw i32 %N.addr.012, -4
%cmp = icmp sgt i32 %N.addr.012, 4
br i1 %cmp, label %while.body, label %while.end
@@ -867,7 +867,7 @@ while.end: ; preds = %while.body, %entry
ret void
}
-define void @vqrdmulhq(i32* %x, i32* %y, i32 %n) {
+define void @vqrdmulhq(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vqrdmulhq:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -894,23 +894,23 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012)
- %1 = bitcast i32* %x.addr.014 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4
+ %1 = bitcast ptr %x.addr.014 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4
%3 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>)
- %4 = bitcast i32* %y.addr.013 to <4 x i32>*
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4
+ %4 = bitcast ptr %y.addr.013 to ptr
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4
%sub = add nsw i32 %i.012, -4
%cmp = icmp sgt i32 %i.012, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vqrdmulh(i32* %s1, i32 %N) {
+define void @vqrdmulh(ptr %s1, i32 %N) {
; CHECK-LABEL: vqrdmulh:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -937,14 +937,14 @@ while.body.lr.ph: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.lr.ph, %while.body
- %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
+ %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
- %1 = bitcast i32* %s1.addr.013 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %1 = bitcast ptr %s1.addr.013 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
%3 = tail call <4 x i32> @llvm.arm.mve.qrdmulh.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, <4 x i1> %0, <4 x i32> %2)
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0)
- %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %1, i32 4, <4 x i1> %0)
+ %add.ptr = getelementptr inbounds i32, ptr %s1.addr.013, i32 4
%sub = add nsw i32 %N.addr.012, -4
%cmp = icmp sgt i32 %N.addr.012, 4
br i1 %cmp, label %while.body, label %while.end
@@ -953,7 +953,7 @@ while.end: ; preds = %while.body, %entry
ret void
}
-define void @vmlaq(i32* %x, i32* %y, i32 %n) {
+define void @vmlaq(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vmlaq:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -981,25 +981,25 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.017 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.016 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.017 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.016 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.015 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.015)
- %1 = bitcast i32* %x.addr.017 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.017, i32 4
- %3 = bitcast i32* %y.addr.016 to <4 x i32>*
- %4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %1 = bitcast ptr %x.addr.017 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.017, i32 4
+ %3 = bitcast ptr %y.addr.016 to ptr
+ %4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %3, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
%5 = mul <4 x i32> %4, <i32 10, i32 10, i32 10, i32 10>
%6 = add <4 x i32> %5, %2
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %6, <4 x i32>* %3, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.016, i32 4
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %6, ptr %3, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.016, i32 4
%sub = add nsw i32 %i.015, -4
%cmp = icmp sgt i32 %i.015, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vmlaqp(i32* %x, i32* %y, i32 %n) {
+define void @vmlaqp(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vmlaqp:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1027,24 +1027,24 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.018 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.017 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.018 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.017 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.016 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.016)
- %1 = bitcast i32* %x.addr.018 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.018, i32 4
- %3 = bitcast i32* %y.addr.017 to <4 x i32>*
- %4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %1 = bitcast ptr %x.addr.018 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.018, i32 4
+ %3 = bitcast ptr %y.addr.017 to ptr
+ %4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %3, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
%5 = tail call <4 x i32> @llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %4, i32 10, <4 x i1> %0)
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %5, <4 x i32>* %3, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.017, i32 4
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %5, ptr %3, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.017, i32 4
%sub = add nsw i32 %i.016, -4
%cmp = icmp sgt i32 %i.016, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vmlasq(i32* %x, i32* %y, i32 %n) {
+define void @vmlasq(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vmlasq:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1072,25 +1072,25 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.017 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.016 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.017 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.016 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.015 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.015)
- %1 = bitcast i32* %x.addr.017 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.017, i32 4
- %3 = bitcast i32* %y.addr.016 to <4 x i32>*
- %4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %1 = bitcast ptr %x.addr.017 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.017, i32 4
+ %3 = bitcast ptr %y.addr.016 to ptr
+ %4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %3, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
%5 = mul <4 x i32> %4, %2
%6 = add <4 x i32> %5, <i32 10, i32 10, i32 10, i32 10>
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %6, <4 x i32>* %3, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.016, i32 4
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %6, ptr %3, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.016, i32 4
%sub = add nsw i32 %i.015, -4
%cmp = icmp sgt i32 %i.015, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vmlasqp(i32* %x, i32* %y, i32 %n) {
+define void @vmlasqp(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vmlasqp:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1118,24 +1118,24 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.018 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.017 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.018 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.017 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.016 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.016)
- %1 = bitcast i32* %x.addr.018 to <4 x i32>*
- %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.018, i32 4
- %3 = bitcast i32* %y.addr.017 to <4 x i32>*
- %4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %1 = bitcast ptr %x.addr.018 to ptr
+ %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.018, i32 4
+ %3 = bitcast ptr %y.addr.017 to ptr
+ %4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %3, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
%5 = tail call <4 x i32> @llvm.arm.mve.vmlas.n.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %4, i32 10, <4 x i1> %0)
- tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %5, <4 x i32>* %3, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.017, i32 4
+ tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %5, ptr %3, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.017, i32 4
%sub = add nsw i32 %i.016, -4
%cmp = icmp sgt i32 %i.016, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vaddqf(float* %x, float* %y, i32 %n) {
+define void @vaddqf(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vaddqf:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1162,23 +1162,23 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.014 = phi float* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.013 = phi float* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012)
- %1 = bitcast float* %x.addr.014 to <4 x float>*
- %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
- %add.ptr = getelementptr inbounds float, float* %x.addr.014, i32 4
+ %1 = bitcast ptr %x.addr.014 to ptr
+ %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
+ %add.ptr = getelementptr inbounds float, ptr %x.addr.014, i32 4
%3 = fadd fast <4 x float> %2, <float 10.0, float 10.0, float 10.0, float 10.0>
- %4 = bitcast float* %y.addr.013 to <4 x float>*
- tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %4, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds float, float* %y.addr.013, i32 4
+ %4 = bitcast ptr %y.addr.013 to ptr
+ tail call void @llvm.masked.store.v4f32.p0(<4 x float> %3, ptr %4, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds float, ptr %y.addr.013, i32 4
%sub = add nsw i32 %i.012, -4
%cmp = icmp sgt i32 %i.012, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vaddf(float* %s1, i32 %N) {
+define void @vaddf(ptr %s1, i32 %N) {
; CHECK-LABEL: vaddf:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1206,14 +1206,14 @@ while.body.lr.ph: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.lr.ph, %while.body
- %s1.addr.013 = phi float* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
+ %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
- %1 = bitcast float* %s1.addr.013 to <4 x float>*
- %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
+ %1 = bitcast ptr %s1.addr.013 to ptr
+ %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
%3 = tail call fast <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %2, <4 x float> <float 10.0, float 10.0, float 10.0, float 10.0>, <4 x i1> %0, <4 x float> %2)
- tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %1, i32 4, <4 x i1> %0)
- %add.ptr = getelementptr inbounds float, float* %s1.addr.013, i32 4
+ tail call void @llvm.masked.store.v4f32.p0(<4 x float> %3, ptr %1, i32 4, <4 x i1> %0)
+ %add.ptr = getelementptr inbounds float, ptr %s1.addr.013, i32 4
%sub = add nsw i32 %N.addr.012, -4
%cmp = icmp sgt i32 %N.addr.012, 4
br i1 %cmp, label %while.body, label %while.end
@@ -1222,7 +1222,7 @@ while.end: ; preds = %while.body, %entry
ret void
}
-define void @vsubqf(float* %x, float* %y, i32 %n) {
+define void @vsubqf(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vsubqf:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1249,23 +1249,23 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.014 = phi float* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.013 = phi float* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012)
- %1 = bitcast float* %x.addr.014 to <4 x float>*
- %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
- %add.ptr = getelementptr inbounds float, float* %x.addr.014, i32 4
+ %1 = bitcast ptr %x.addr.014 to ptr
+ %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
+ %add.ptr = getelementptr inbounds float, ptr %x.addr.014, i32 4
%3 = fsub fast <4 x float> %2, <float 10.0, float 10.0, float 10.0, float 10.0>
- %4 = bitcast float* %y.addr.013 to <4 x float>*
- tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %4, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds float, float* %y.addr.013, i32 4
+ %4 = bitcast ptr %y.addr.013 to ptr
+ tail call void @llvm.masked.store.v4f32.p0(<4 x float> %3, ptr %4, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds float, ptr %y.addr.013, i32 4
%sub = add nsw i32 %i.012, -4
%cmp = icmp sgt i32 %i.012, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vsubf(float* %s1, i32 %N) {
+define void @vsubf(ptr %s1, i32 %N) {
; CHECK-LABEL: vsubf:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1293,14 +1293,14 @@ while.body.lr.ph: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.lr.ph, %while.body
- %s1.addr.013 = phi float* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
+ %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
- %1 = bitcast float* %s1.addr.013 to <4 x float>*
- %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
+ %1 = bitcast ptr %s1.addr.013 to ptr
+ %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
%3 = tail call fast <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> %2, <4 x float> <float 10.0, float 10.0, float 10.0, float 10.0>, <4 x i1> %0, <4 x float> %2)
- tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %1, i32 4, <4 x i1> %0)
- %add.ptr = getelementptr inbounds float, float* %s1.addr.013, i32 4
+ tail call void @llvm.masked.store.v4f32.p0(<4 x float> %3, ptr %1, i32 4, <4 x i1> %0)
+ %add.ptr = getelementptr inbounds float, ptr %s1.addr.013, i32 4
%sub = add nsw i32 %N.addr.012, -4
%cmp = icmp sgt i32 %N.addr.012, 4
br i1 %cmp, label %while.body, label %while.end
@@ -1309,7 +1309,7 @@ while.end: ; preds = %while.body, %entry
ret void
}
-define void @vmulqf(float* %x, float* %y, i32 %n) {
+define void @vmulqf(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vmulqf:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1336,23 +1336,23 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.014 = phi float* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.013 = phi float* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012)
- %1 = bitcast float* %x.addr.014 to <4 x float>*
- %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
- %add.ptr = getelementptr inbounds float, float* %x.addr.014, i32 4
+ %1 = bitcast ptr %x.addr.014 to ptr
+ %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
+ %add.ptr = getelementptr inbounds float, ptr %x.addr.014, i32 4
%3 = fmul fast <4 x float> %2, <float 10.0, float 10.0, float 10.0, float 10.0>
- %4 = bitcast float* %y.addr.013 to <4 x float>*
- tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %4, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds float, float* %y.addr.013, i32 4
+ %4 = bitcast ptr %y.addr.013 to ptr
+ tail call void @llvm.masked.store.v4f32.p0(<4 x float> %3, ptr %4, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds float, ptr %y.addr.013, i32 4
%sub = add nsw i32 %i.012, -4
%cmp = icmp sgt i32 %i.012, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vmulf(float* %s1, i32 %N) {
+define void @vmulf(ptr %s1, i32 %N) {
; CHECK-LABEL: vmulf:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1380,14 +1380,14 @@ while.body.lr.ph: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.lr.ph, %while.body
- %s1.addr.013 = phi float* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
+ %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
- %1 = bitcast float* %s1.addr.013 to <4 x float>*
- %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
+ %1 = bitcast ptr %s1.addr.013 to ptr
+ %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
%3 = tail call fast <4 x float> @llvm.arm.mve.mul.predicated.v4f32.v4i1(<4 x float> %2, <4 x float> <float 10.0, float 10.0, float 10.0, float 10.0>, <4 x i1> %0, <4 x float> %2)
- tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %1, i32 4, <4 x i1> %0)
- %add.ptr = getelementptr inbounds float, float* %s1.addr.013, i32 4
+ tail call void @llvm.masked.store.v4f32.p0(<4 x float> %3, ptr %1, i32 4, <4 x i1> %0)
+ %add.ptr = getelementptr inbounds float, ptr %s1.addr.013, i32 4
%sub = add nsw i32 %N.addr.012, -4
%cmp = icmp sgt i32 %N.addr.012, 4
br i1 %cmp, label %while.body, label %while.end
@@ -1396,7 +1396,7 @@ while.end: ; preds = %while.body, %entry
ret void
}
-define void @vfmaq(float* %x, float* %y, i32 %n) {
+define void @vfmaq(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vfmaq:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1424,24 +1424,24 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.017 = phi float* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.016 = phi float* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.017 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.016 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.015 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.015)
- %1 = bitcast float* %x.addr.017 to <4 x float>*
- %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
- %add.ptr = getelementptr inbounds float, float* %x.addr.017, i32 4
- %3 = bitcast float* %y.addr.016 to <4 x float>*
- %4 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %3, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
+ %1 = bitcast ptr %x.addr.017 to ptr
+ %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
+ %add.ptr = getelementptr inbounds float, ptr %x.addr.017, i32 4
+ %3 = bitcast ptr %y.addr.016 to ptr
+ %4 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %3, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
%5 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %4, <4 x float> <float 10.0, float 10.0, float 10.0, float 10.0>, <4 x float> %2)
- tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %5, <4 x float>* %3, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds float, float* %y.addr.016, i32 4
+ tail call void @llvm.masked.store.v4f32.p0(<4 x float> %5, ptr %3, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds float, ptr %y.addr.016, i32 4
%sub = add nsw i32 %i.015, -4
%cmp = icmp sgt i32 %i.015, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vfma(float* %s1, float* %s2, i32 %N) {
+define void @vfma(ptr %s1, ptr %s2, i32 %N) {
; CHECK-LABEL: vfma:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1466,19 +1466,19 @@ entry:
br i1 %cmp12, label %while.body.lr.ph, label %while.end
while.body.lr.ph: ; preds = %entry
- %0 = bitcast float* %s2 to <4 x float>*
+ %0 = bitcast ptr %s2 to ptr
br label %while.body
while.body: ; preds = %while.body.lr.ph, %while.body
- %s1.addr.014 = phi float* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
+ %s1.addr.014 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
%N.addr.013 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
%1 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.013)
- %2 = bitcast float* %s1.addr.014 to <4 x float>*
- %3 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> zeroinitializer)
- %4 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %1, <4 x float> zeroinitializer)
+ %2 = bitcast ptr %s1.addr.014 to ptr
+ %3 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %2, i32 4, <4 x i1> %1, <4 x float> zeroinitializer)
+ %4 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %0, i32 4, <4 x i1> %1, <4 x float> zeroinitializer)
%5 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %4, <4 x float> <float 10.0, float 10.0, float 10.0, float 10.0>, <4 x float> %3, <4 x i1> %1)
- tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %5, <4 x float>* %2, i32 4, <4 x i1> %1)
- %add.ptr = getelementptr inbounds float, float* %s1.addr.014, i32 4
+ tail call void @llvm.masked.store.v4f32.p0(<4 x float> %5, ptr %2, i32 4, <4 x i1> %1)
+ %add.ptr = getelementptr inbounds float, ptr %s1.addr.014, i32 4
%sub = add nsw i32 %N.addr.013, -4
%cmp = icmp sgt i32 %N.addr.013, 4
br i1 %cmp, label %while.body, label %while.end
@@ -1487,7 +1487,7 @@ while.end: ; preds = %while.body, %entry
ret void
}
-define void @vfmasq(float* %x, float* %y, i32 %n) {
+define void @vfmasq(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: vfmasq:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1516,24 +1516,24 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %x.addr.017 = phi float* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.016 = phi float* [ %add.ptr1, %for.body ], [ %y, %entry ]
+ %x.addr.017 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.016 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%i.015 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.015)
- %1 = bitcast float* %x.addr.017 to <4 x float>*
- %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
- %add.ptr = getelementptr inbounds float, float* %x.addr.017, i32 4
- %3 = bitcast float* %y.addr.016 to <4 x float>*
- %4 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %3, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
+ %1 = bitcast ptr %x.addr.017 to ptr
+ %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
+ %add.ptr = getelementptr inbounds float, ptr %x.addr.017, i32 4
+ %3 = bitcast ptr %y.addr.016 to ptr
+ %4 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %3, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
%5 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %2, <4 x float> %4, <4 x float> <float 10.0, float 10.0, float 10.0, float 10.0>)
- tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %5, <4 x float>* %3, i32 4, <4 x i1> %0)
- %add.ptr1 = getelementptr inbounds float, float* %y.addr.016, i32 4
+ tail call void @llvm.masked.store.v4f32.p0(<4 x float> %5, ptr %3, i32 4, <4 x i1> %0)
+ %add.ptr1 = getelementptr inbounds float, ptr %y.addr.016, i32 4
%sub = add nsw i32 %i.015, -4
%cmp = icmp sgt i32 %i.015, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @vfmas(float* %s1, float* %s2, i32 %N) {
+define void @vfmas(ptr %s1, ptr %s2, i32 %N) {
; CHECK-LABEL: vfmas:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1559,19 +1559,19 @@ entry:
br i1 %cmp12, label %while.body.lr.ph, label %while.end
while.body.lr.ph: ; preds = %entry
- %0 = bitcast float* %s2 to <4 x float>*
+ %0 = bitcast ptr %s2 to ptr
br label %while.body
while.body: ; preds = %while.body.lr.ph, %while.body
- %s1.addr.014 = phi float* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
+ %s1.addr.014 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
%N.addr.013 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
%1 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.013)
- %2 = bitcast float* %s1.addr.014 to <4 x float>*
- %3 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> zeroinitializer)
- %4 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %1, <4 x float> zeroinitializer)
+ %2 = bitcast ptr %s1.addr.014 to ptr
+ %3 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %2, i32 4, <4 x i1> %1, <4 x float> zeroinitializer)
+ %4 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %0, i32 4, <4 x i1> %1, <4 x float> zeroinitializer)
%5 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %3, <4 x float> %4, <4 x float> <float 10.0, float 10.0, float 10.0, float 10.0>, <4 x i1> %1)
- tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %5, <4 x float>* %2, i32 4, <4 x i1> %1)
- %add.ptr = getelementptr inbounds float, float* %s1.addr.014, i32 4
+ tail call void @llvm.masked.store.v4f32.p0(<4 x float> %5, ptr %2, i32 4, <4 x i1> %1)
+ %add.ptr = getelementptr inbounds float, ptr %s1.addr.014, i32 4
%sub = add nsw i32 %N.addr.013, -4
%cmp = icmp sgt i32 %N.addr.013, 4
br i1 %cmp, label %while.body, label %while.end
@@ -1580,7 +1580,7 @@ while.end: ; preds = %while.body, %entry
ret void
}
-define void @rgbconvert(i32* noalias %pwSourceBase, i16 signext %iSourceStride, i16* noalias %phwTargetBase, i16 signext %iTargetStride, i16 %iHeight, i16 %iWidth) {
+define void @rgbconvert(ptr noalias %pwSourceBase, i16 signext %iSourceStride, ptr noalias %phwTargetBase, i16 signext %iTargetStride, i16 %iHeight, i16 %iWidth) {
; CHECK-LABEL: rgbconvert:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -1655,18 +1655,18 @@ for.cond.cleanup: ; preds = %do.end, %entry
ret void
for.body: ; preds = %for.body.lr.ph, %do.end
- %pwSourceBase.addr.040 = phi i32* [ %pwSourceBase, %for.body.lr.ph ], [ %add.ptr10, %do.end ]
- %phwTargetBase.addr.039 = phi i16* [ %phwTargetBase, %for.body.lr.ph ], [ %add.ptr12, %do.end ]
+ %pwSourceBase.addr.040 = phi ptr [ %pwSourceBase, %for.body.lr.ph ], [ %add.ptr10, %do.end ]
+ %phwTargetBase.addr.039 = phi ptr [ %phwTargetBase, %for.body.lr.ph ], [ %add.ptr12, %do.end ]
%y.038 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %do.end ]
br label %do.body
do.body: ; preds = %do.body, %for.body
- %pTarget.0 = phi i16* [ %phwTargetBase.addr.039, %for.body ], [ %add.ptr6, %do.body ]
- %pSource.0 = phi i32* [ %pwSourceBase.addr.040, %for.body ], [ %add.ptr, %do.body ]
+ %pTarget.0 = phi ptr [ %phwTargetBase.addr.039, %for.body ], [ %add.ptr6, %do.body ]
+ %pSource.0 = phi ptr [ %pwSourceBase.addr.040, %for.body ], [ %add.ptr, %do.body ]
%blkCnt.0 = phi i32 [ %conv2, %for.body ], [ %sub, %do.body ]
%l2 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0)
- %l3 = bitcast i32* %pSource.0 to <4 x i32>*
- %l4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %l3, i32 4, <4 x i1> %l2, <4 x i32> zeroinitializer)
+ %l3 = bitcast ptr %pSource.0 to ptr
+ %l4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %l3, i32 4, <4 x i1> %l2, <4 x i32> zeroinitializer)
%l5 = tail call <4 x i32> @llvm.arm.mve.qdmulh.predicated.v4i32.v4i1(<4 x i32> %l4, <4 x i32> <i32 268435456, i32 268435456, i32 268435456, i32 268435456>, <4 x i1> %l2, <4 x i32> undef)
%and = and <4 x i32> %l5, <i32 31, i32 31, i32 31, i32 31>
%l6 = tail call <4 x i32> @llvm.arm.mve.qdmulh.predicated.v4i32.v4i1(<4 x i32> %l4, <4 x i32> <i32 67108864, i32 67108864, i32 67108864, i32 67108864>, <4 x i1> %l2, <4 x i32> undef)
@@ -1676,29 +1676,29 @@ do.body: ; preds = %do.body, %for.body
%or = or <4 x i32> %and3, %and
%or5 = or <4 x i32> %or, %and4
%l8 = trunc <4 x i32> %or5 to <4 x i16>
- %l9 = bitcast i16* %pTarget.0 to <4 x i16>*
- tail call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %l8, <4 x i16>* %l9, i32 2, <4 x i1> %l2)
- %add.ptr = getelementptr inbounds i32, i32* %pSource.0, i32 4
- %add.ptr6 = getelementptr inbounds i16, i16* %pTarget.0, i32 4
+ %l9 = bitcast ptr %pTarget.0 to ptr
+ tail call void @llvm.masked.store.v4i16.p0(<4 x i16> %l8, ptr %l9, i32 2, <4 x i1> %l2)
+ %add.ptr = getelementptr inbounds i32, ptr %pSource.0, i32 4
+ %add.ptr6 = getelementptr inbounds i16, ptr %pTarget.0, i32 4
%sub = add nsw i32 %blkCnt.0, -4
%cmp7 = icmp sgt i32 %blkCnt.0, 4
br i1 %cmp7, label %do.body, label %do.end
do.end: ; preds = %do.body
- %add.ptr10 = getelementptr inbounds i32, i32* %pwSourceBase.addr.040, i32 %conv9
- %add.ptr12 = getelementptr inbounds i16, i16* %phwTargetBase.addr.039, i32 %conv11
+ %add.ptr10 = getelementptr inbounds i32, ptr %pwSourceBase.addr.040, i32 %conv9
+ %add.ptr12 = getelementptr inbounds i16, ptr %phwTargetBase.addr.039, i32 %conv11
%inc = add nuw nsw i32 %y.038, 1
%exitcond.not = icmp eq i32 %inc, %conv
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
-declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
-declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
-declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
-declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
-declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>)
-declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32 immarg, <4 x i1>) #3
+declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>)
+declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
+declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>)
+declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
+declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32 immarg, <4 x i1>)
+declare void @llvm.masked.store.v4i16.p0(<4 x i16>, ptr, i32 immarg, <4 x i1>) #3
declare <4 x i32> @llvm.arm.mve.add.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>)
declare <4 x i32> @llvm.arm.mve.sub.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add-combine.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add-combine.ll
index 8a8b6c5b6ea20..37ef4a9207259 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add-combine.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add-combine.ll
@@ -13,7 +13,7 @@ entry:
%reduce1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %arg2)
%reduce2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %arg3)
%add1 = add i32 %reduce1, %reduce2
- store i32 %add1, i32* %ptr, align 4
+ store i32 %add1, ptr %ptr, align 4
%add2 = add i32 %add1, %arg1
ret i32 %add2
}
@@ -30,7 +30,7 @@ entry:
%reduce1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %arg2)
%reduce2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %arg3)
%add1 = add i32 %reduce1, %reduce2
- store i32 %add1, i32* %ptr, align 4
+ store i32 %add1, ptr %ptr, align 4
%add2 = add i32 %arg1, %add1
ret i32 %add2
}
@@ -48,7 +48,7 @@ entry:
%reduce1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %arg3)
%reduce2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %arg4)
%add1 = add i32 %arg1, %reduce1
- store i32 %add1, i32* %ptr, align 4
+ store i32 %add1, ptr %ptr, align 4
%add2 = add i32 %arg2, %reduce2
%add3 = add i32 %add1, %add2
ret i32 %add3
@@ -66,13 +66,13 @@ define arm_aapcs_vfpcc i32 @test4(ptr %ptr, i32 %arg1, ptr %arg2) {
; CHECK-NEXT: mov r0, r12
; CHECK-NEXT: bx lr
entry:
- %load1 = load <4 x i32>, <4 x i32>* %arg2, align 4
- %gep = getelementptr inbounds i32, i32* %arg2, i32 1
- %load2 = load <4 x i32>, <4 x i32>* %gep, align 4
+ %load1 = load <4 x i32>, ptr %arg2, align 4
+ %gep = getelementptr inbounds i32, ptr %arg2, i32 1
+ %load2 = load <4 x i32>, ptr %gep, align 4
%reduce1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %load1)
%reduce2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %load2)
%add1 = add i32 %arg1, %reduce1
- store i32 %add1, i32* %ptr, align 4
+ store i32 %add1, ptr %ptr, align 4
%add2 = add i32 %add1, %reduce2
ret i32 %add2
}
@@ -89,13 +89,13 @@ define arm_aapcs_vfpcc i32 @test5(ptr %ptr, i32 %arg1, ptr %arg2) {
; CHECK-NEXT: mov r0, r12
; CHECK-NEXT: bx lr
entry:
- %load1 = load <4 x i32>, <4 x i32>* %arg2, align 4
- %gep = getelementptr inbounds i32, i32* %arg2, i32 1
- %load2 = load <4 x i32>, <4 x i32>* %gep, align 4
+ %load1 = load <4 x i32>, ptr %arg2, align 4
+ %gep = getelementptr inbounds i32, ptr %arg2, i32 1
+ %load2 = load <4 x i32>, ptr %gep, align 4
%reduce1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %load1)
%reduce2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %load2)
%add1 = add i32 %arg1, %reduce2
- store i32 %add1, i32* %ptr, align 4
+ store i32 %add1, ptr %ptr, align 4
%add2 = add i32 %add1, %reduce1
ret i32 %add2
}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll
index 6ab1a9344bb23..c7661a1f430c6 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -tail-predication=enabled -verify-machineinstrs %s -o - | FileCheck %s
-define i32 @add_i32(i32* nocapture readonly %x, i32 %n) {
+define i32 @add_i32(ptr nocapture readonly %x, i32 %n) {
; CHECK-LABEL: add_i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -60,9 +60,9 @@ vector.ph: ; preds = %for.body.preheader
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i32 [ 0, %vector.ph ], [ %3, %vector.body ]
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.load = load <4 x i32>, ptr %1, align 4
%2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %wide.load)
%3 = add i32 %2, %vec.phi
%index.next = add i32 %index, 4
@@ -81,8 +81,8 @@ for.body.preheader1: ; preds = %middle.block, %for.
for.body: ; preds = %for.body.preheader1, %for.body
%i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ]
%r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ]
- %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
- %5 = load i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08
+ %5 = load i32, ptr %arrayidx, align 4
%add = add nsw i32 %5, %r.07
%inc = add nuw nsw i32 %i.08, 1
%exitcond = icmp eq i32 %inc, %n
@@ -93,7 +93,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
ret i32 %r.0.lcssa
}
-define i32 @mul_i32(i32* nocapture readonly %x, i32 %n) {
+define i32 @mul_i32(ptr nocapture readonly %x, i32 %n) {
; CHECK-LABEL: mul_i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
@@ -152,9 +152,9 @@ vector.ph: ; preds = %for.body.preheader
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ <i32 1, i32 1, i32 1, i32 1>, %vector.ph ], [ %2, %vector.body ]
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.load = load <4 x i32>, ptr %1, align 4
%2 = mul <4 x i32> %wide.load, %vec.phi
%index.next = add i32 %index, 4
%3 = icmp eq i32 %index.next, %n.vec
@@ -173,8 +173,8 @@ for.body.preheader1: ; preds = %middle.block, %for.
for.body: ; preds = %for.body.preheader1, %for.body
%i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ]
%r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ]
- %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
- %5 = load i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08
+ %5 = load i32, ptr %arrayidx, align 4
%add = mul nsw i32 %5, %r.07
%inc = add nuw nsw i32 %i.08, 1
%exitcond = icmp eq i32 %inc, %n
@@ -185,7 +185,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
ret i32 %r.0.lcssa
}
-define i32 @and_i32(i32* nocapture readonly %x, i32 %n) {
+define i32 @and_i32(ptr nocapture readonly %x, i32 %n) {
; CHECK-LABEL: and_i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
@@ -249,9 +249,9 @@ vector.ph: ; preds = %for.body.preheader
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, %vector.ph ], [ %2, %vector.body ]
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.load = load <4 x i32>, ptr %1, align 4
%2 = and <4 x i32> %wide.load, %vec.phi
%index.next = add i32 %index, 4
%3 = icmp eq i32 %index.next, %n.vec
@@ -270,8 +270,8 @@ for.body.preheader1: ; preds = %middle.block, %for.
for.body: ; preds = %for.body.preheader1, %for.body
%i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ]
%r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ]
- %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
- %5 = load i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08
+ %5 = load i32, ptr %arrayidx, align 4
%add = and i32 %5, %r.07
%inc = add nuw nsw i32 %i.08, 1
%exitcond = icmp eq i32 %inc, %n
@@ -282,7 +282,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
ret i32 %r.0.lcssa
}
-define i32 @or_i32(i32* nocapture readonly %x, i32 %n) {
+define i32 @or_i32(ptr nocapture readonly %x, i32 %n) {
; CHECK-LABEL: or_i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
@@ -346,9 +346,9 @@ vector.ph: ; preds = %for.body.preheader
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %2, %vector.body ]
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.load = load <4 x i32>, ptr %1, align 4
%2 = or <4 x i32> %wide.load, %vec.phi
%index.next = add i32 %index, 4
%3 = icmp eq i32 %index.next, %n.vec
@@ -367,8 +367,8 @@ for.body.preheader1: ; preds = %middle.block, %for.
for.body: ; preds = %for.body.preheader1, %for.body
%i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ]
%r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ]
- %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
- %5 = load i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08
+ %5 = load i32, ptr %arrayidx, align 4
%add = or i32 %5, %r.07
%inc = add nuw nsw i32 %i.08, 1
%exitcond = icmp eq i32 %inc, %n
@@ -379,7 +379,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
ret i32 %r.0.lcssa
}
-define i32 @xor_i32(i32* nocapture readonly %x, i32 %n) {
+define i32 @xor_i32(ptr nocapture readonly %x, i32 %n) {
; CHECK-LABEL: xor_i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
@@ -443,9 +443,9 @@ vector.ph: ; preds = %for.body.preheader
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %2, %vector.body ]
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.load = load <4 x i32>, ptr %1, align 4
%2 = xor <4 x i32> %wide.load, %vec.phi
%index.next = add i32 %index, 4
%3 = icmp eq i32 %index.next, %n.vec
@@ -464,8 +464,8 @@ for.body.preheader1: ; preds = %middle.block, %for.
for.body: ; preds = %for.body.preheader1, %for.body
%i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ]
%r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ]
- %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
- %5 = load i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08
+ %5 = load i32, ptr %arrayidx, align 4
%add = xor i32 %5, %r.07
%inc = add nuw nsw i32 %i.08, 1
%exitcond = icmp eq i32 %inc, %n
@@ -476,7 +476,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
ret i32 %r.0.lcssa
}
-define float @fadd_f32(float* nocapture readonly %x, i32 %n) {
+define float @fadd_f32(ptr nocapture readonly %x, i32 %n) {
; CHECK-LABEL: fadd_f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -542,9 +542,9 @@ vector.ph: ; preds = %for.body.preheader
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x float> [ zeroinitializer, %vector.ph ], [ %2, %vector.body ]
- %0 = getelementptr inbounds float, float* %x, i32 %index
- %1 = bitcast float* %0 to <4 x float>*
- %wide.load = load <4 x float>, <4 x float>* %1, align 4
+ %0 = getelementptr inbounds float, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.load = load <4 x float>, ptr %1, align 4
%2 = fadd fast <4 x float> %wide.load, %vec.phi
%index.next = add i32 %index, 4
%3 = icmp eq i32 %index.next, %n.vec
@@ -563,8 +563,8 @@ for.body.preheader1: ; preds = %middle.block, %for.
for.body: ; preds = %for.body.preheader1, %for.body
%i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ]
%r.07 = phi float [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ]
- %arrayidx = getelementptr inbounds float, float* %x, i32 %i.08
- %5 = load float, float* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.08
+ %5 = load float, ptr %arrayidx, align 4
%add = fadd fast float %5, %r.07
%inc = add nuw nsw i32 %i.08, 1
%exitcond = icmp eq i32 %inc, %n
@@ -575,7 +575,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
ret float %r.0.lcssa
}
-define float @fmul_f32(float* nocapture readonly %x, i32 %n) {
+define float @fmul_f32(ptr nocapture readonly %x, i32 %n) {
; CHECK-LABEL: fmul_f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -637,9 +637,9 @@ vector.ph: ; preds = %for.body.preheader
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x float> [ <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %vector.ph ], [ %2, %vector.body ]
- %0 = getelementptr inbounds float, float* %x, i32 %index
- %1 = bitcast float* %0 to <4 x float>*
- %wide.load = load <4 x float>, <4 x float>* %1, align 4
+ %0 = getelementptr inbounds float, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.load = load <4 x float>, ptr %1, align 4
%2 = fmul fast <4 x float> %wide.load, %vec.phi
%index.next = add i32 %index, 4
%3 = icmp eq i32 %index.next, %n.vec
@@ -658,8 +658,8 @@ for.body.preheader1: ; preds = %middle.block, %for.
for.body: ; preds = %for.body.preheader1, %for.body
%i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ]
%r.07 = phi float [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ]
- %arrayidx = getelementptr inbounds float, float* %x, i32 %i.08
- %5 = load float, float* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.08
+ %5 = load float, ptr %arrayidx, align 4
%add = fmul fast float %5, %r.07
%inc = add nuw nsw i32 %i.08, 1
%exitcond = icmp eq i32 %inc, %n
@@ -670,7 +670,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
ret float %r.0.lcssa
}
-define i32 @smin_i32(i32* nocapture readonly %x, i32 %n) {
+define i32 @smin_i32(ptr nocapture readonly %x, i32 %n) {
; CHECK-LABEL: smin_i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -732,9 +732,9 @@ vector.ph: ; preds = %for.body.preheader
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, %vector.ph ], [ %3, %vector.body ]
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.load = load <4 x i32>, ptr %1, align 4
%2 = icmp slt <4 x i32> %vec.phi, %wide.load
%3 = select <4 x i1> %2, <4 x i32> %vec.phi, <4 x i32> %wide.load
%index.next = add i32 %index, 4
@@ -754,8 +754,8 @@ for.body.preheader1: ; preds = %middle.block, %for.
for.body: ; preds = %for.body.preheader1, %for.body
%i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ]
%r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ]
- %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
- %6 = load i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08
+ %6 = load i32, ptr %arrayidx, align 4
%c = icmp slt i32 %r.07, %6
%add = select i1 %c, i32 %r.07, i32 %6
%inc = add nuw nsw i32 %i.08, 1
@@ -767,7 +767,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
ret i32 %r.0.lcssa
}
-define i32 @smin_i32_inloop(i32* nocapture readonly %x, i32 %n) {
+define i32 @smin_i32_inloop(ptr nocapture readonly %x, i32 %n) {
; CHECK-LABEL: smin_i32_inloop:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -827,9 +827,9 @@ vector.ph: ; preds = %for.body.preheader
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i32 [ 2147483647, %vector.ph ], [ %3, %vector.body ]
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.load = load <4 x i32>, ptr %1, align 4
%l5 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %wide.load)
%2 = icmp slt i32 %vec.phi, %l5
%3 = select i1 %2, i32 %vec.phi, i32 %l5
@@ -850,8 +850,8 @@ for.body.preheader1: ; preds = %middle.block, %for.
for.body: ; preds = %for.body.preheader1, %for.body
%i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ]
%r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ]
- %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
- %6 = load i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08
+ %6 = load i32, ptr %arrayidx, align 4
%c = icmp slt i32 %r.07, %6
%add = select i1 %c, i32 %r.07, i32 %6
%inc = add nuw nsw i32 %i.08, 1
@@ -863,7 +863,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
ret i32 %r.0.lcssa
}
-define i32 @smax_i32(i32* nocapture readonly %x, i32 %n) {
+define i32 @smax_i32(ptr nocapture readonly %x, i32 %n) {
; CHECK-LABEL: smax_i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -925,9 +925,9 @@ vector.ph: ; preds = %for.body.preheader
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, %vector.ph ], [ %3, %vector.body ]
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.load = load <4 x i32>, ptr %1, align 4
%2 = icmp sgt <4 x i32> %vec.phi, %wide.load
%3 = select <4 x i1> %2, <4 x i32> %vec.phi, <4 x i32> %wide.load
%index.next = add i32 %index, 4
@@ -947,8 +947,8 @@ for.body.preheader1: ; preds = %middle.block, %for.
for.body: ; preds = %for.body.preheader1, %for.body
%i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ]
%r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ]
- %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
- %6 = load i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08
+ %6 = load i32, ptr %arrayidx, align 4
%c = icmp sgt i32 %r.07, %6
%add = select i1 %c, i32 %r.07, i32 %6
%inc = add nuw nsw i32 %i.08, 1
@@ -960,7 +960,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
ret i32 %r.0.lcssa
}
-define i32 @smax_i32_inloop(i32* nocapture readonly %x, i32 %n) {
+define i32 @smax_i32_inloop(ptr nocapture readonly %x, i32 %n) {
; CHECK-LABEL: smax_i32_inloop:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1020,9 +1020,9 @@ vector.ph: ; preds = %for.body.preheader
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i32 [ -2147483648, %vector.ph ], [ %3, %vector.body ]
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.load = load <4 x i32>, ptr %1, align 4
%l5 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %wide.load)
%2 = icmp sgt i32 %vec.phi, %l5
%3 = select i1 %2, i32 %vec.phi, i32 %l5
@@ -1043,8 +1043,8 @@ for.body.preheader1: ; preds = %middle.block, %for.
for.body: ; preds = %for.body.preheader1, %for.body
%i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ]
%r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ]
- %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
- %6 = load i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08
+ %6 = load i32, ptr %arrayidx, align 4
%c = icmp sgt i32 %r.07, %6
%add = select i1 %c, i32 %r.07, i32 %6
%inc = add nuw nsw i32 %i.08, 1
@@ -1056,7 +1056,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
ret i32 %r.0.lcssa
}
-define i32 @umin_i32(i32* nocapture readonly %x, i32 %n) {
+define i32 @umin_i32(ptr nocapture readonly %x, i32 %n) {
; CHECK-LABEL: umin_i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1118,9 +1118,9 @@ vector.ph: ; preds = %for.body.preheader
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, %vector.ph ], [ %3, %vector.body ]
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.load = load <4 x i32>, ptr %1, align 4
%2 = icmp ult <4 x i32> %vec.phi, %wide.load
%3 = select <4 x i1> %2, <4 x i32> %vec.phi, <4 x i32> %wide.load
%index.next = add i32 %index, 4
@@ -1140,8 +1140,8 @@ for.body.preheader1: ; preds = %middle.block, %for.
for.body: ; preds = %for.body.preheader1, %for.body
%i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ]
%r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ]
- %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
- %6 = load i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08
+ %6 = load i32, ptr %arrayidx, align 4
%c = icmp ult i32 %r.07, %6
%add = select i1 %c, i32 %r.07, i32 %6
%inc = add nuw nsw i32 %i.08, 1
@@ -1153,7 +1153,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
ret i32 %r.0.lcssa
}
-define i32 @umin_i32_inloop(i32* nocapture readonly %x, i32 %n) {
+define i32 @umin_i32_inloop(ptr nocapture readonly %x, i32 %n) {
; CHECK-LABEL: umin_i32_inloop:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1213,9 +1213,9 @@ vector.ph: ; preds = %for.body.preheader
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i32 [ -1, %vector.ph ], [ %3, %vector.body ]
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.load = load <4 x i32>, ptr %1, align 4
%l5 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %wide.load)
%2 = icmp ult i32 %vec.phi, %l5
%3 = select i1 %2, i32 %vec.phi, i32 %l5
@@ -1236,8 +1236,8 @@ for.body.preheader1: ; preds = %middle.block, %for.
for.body: ; preds = %for.body.preheader1, %for.body
%i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ]
%r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ]
- %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
- %6 = load i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08
+ %6 = load i32, ptr %arrayidx, align 4
%c = icmp ugt i32 %r.07, %6
%add = select i1 %c, i32 %r.07, i32 %6
%inc = add nuw nsw i32 %i.08, 1
@@ -1249,7 +1249,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
ret i32 %r.0.lcssa
}
-define i32 @umax_i32(i32* nocapture readonly %x, i32 %n) {
+define i32 @umax_i32(ptr nocapture readonly %x, i32 %n) {
; CHECK-LABEL: umax_i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1311,9 +1311,9 @@ vector.ph: ; preds = %for.body.preheader
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %3, %vector.body ]
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.load = load <4 x i32>, ptr %1, align 4
%2 = icmp ugt <4 x i32> %vec.phi, %wide.load
%3 = select <4 x i1> %2, <4 x i32> %vec.phi, <4 x i32> %wide.load
%index.next = add i32 %index, 4
@@ -1333,8 +1333,8 @@ for.body.preheader1: ; preds = %middle.block, %for.
for.body: ; preds = %for.body.preheader1, %for.body
%i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ]
%r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ]
- %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
- %6 = load i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08
+ %6 = load i32, ptr %arrayidx, align 4
%c = icmp ugt i32 %r.07, %6
%add = select i1 %c, i32 %r.07, i32 %6
%inc = add nuw nsw i32 %i.08, 1
@@ -1346,7 +1346,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
ret i32 %r.0.lcssa
}
-define i32 @umax_i32_inloop(i32* nocapture readonly %x, i32 %n) {
+define i32 @umax_i32_inloop(ptr nocapture readonly %x, i32 %n) {
; CHECK-LABEL: umax_i32_inloop:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1406,9 +1406,9 @@ vector.ph: ; preds = %for.body.preheader
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i32 [ 0, %vector.ph ], [ %3, %vector.body ]
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.load = load <4 x i32>, ptr %1, align 4
%l5 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %wide.load)
%2 = icmp ugt i32 %vec.phi, %l5
%3 = select i1 %2, i32 %vec.phi, i32 %l5
@@ -1429,8 +1429,8 @@ for.body.preheader1: ; preds = %middle.block, %for.
for.body: ; preds = %for.body.preheader1, %for.body
%i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ]
%r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ]
- %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
- %6 = load i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08
+ %6 = load i32, ptr %arrayidx, align 4
%c = icmp ugt i32 %r.07, %6
%add = select i1 %c, i32 %r.07, i32 %6
%inc = add nuw nsw i32 %i.08, 1
@@ -1442,7 +1442,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
ret i32 %r.0.lcssa
}
-define float @fmin_f32(float* nocapture readonly %x, i32 %n) {
+define float @fmin_f32(ptr nocapture readonly %x, i32 %n) {
; CHECK-LABEL: fmin_f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1511,9 +1511,9 @@ vector.ph: ; preds = %for.body.preheader
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x float> [ zeroinitializer, %vector.ph ], [ %3, %vector.body ]
- %0 = getelementptr inbounds float, float* %x, i32 %index
- %1 = bitcast float* %0 to <4 x float>*
- %wide.load = load <4 x float>, <4 x float>* %1, align 4
+ %0 = getelementptr inbounds float, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.load = load <4 x float>, ptr %1, align 4
%2 = fcmp ult <4 x float> %vec.phi, %wide.load
%3 = select <4 x i1> %2, <4 x float> %vec.phi, <4 x float> %wide.load
%index.next = add i32 %index, 4
@@ -1533,8 +1533,8 @@ for.body.preheader1: ; preds = %middle.block, %for.
for.body: ; preds = %for.body.preheader1, %for.body
%i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ]
%r.07 = phi float [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ]
- %arrayidx = getelementptr inbounds float, float* %x, i32 %i.08
- %6 = load float, float* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.08
+ %6 = load float, ptr %arrayidx, align 4
%c = fcmp ult float %r.07, %6
%add = select i1 %c, float %r.07, float %6
%inc = add nuw nsw i32 %i.08, 1
@@ -1546,7 +1546,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
ret float %r.0.lcssa
}
-define float @fmax_f32(float* nocapture readonly %x, i32 %n) {
+define float @fmax_f32(ptr nocapture readonly %x, i32 %n) {
; CHECK-LABEL: fmax_f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1615,9 +1615,9 @@ vector.ph: ; preds = %for.body.preheader
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x float> [ zeroinitializer, %vector.ph ], [ %3, %vector.body ]
- %0 = getelementptr inbounds float, float* %x, i32 %index
- %1 = bitcast float* %0 to <4 x float>*
- %wide.load = load <4 x float>, <4 x float>* %1, align 4
+ %0 = getelementptr inbounds float, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.load = load <4 x float>, ptr %1, align 4
%2 = fcmp ugt <4 x float> %vec.phi, %wide.load
%3 = select <4 x i1> %2, <4 x float> %vec.phi, <4 x float> %wide.load
%index.next = add i32 %index, 4
@@ -1637,8 +1637,8 @@ for.body.preheader1: ; preds = %middle.block, %for.
for.body: ; preds = %for.body.preheader1, %for.body
%i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ]
%r.07 = phi float [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ]
- %arrayidx = getelementptr inbounds float, float* %x, i32 %i.08
- %6 = load float, float* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.08
+ %6 = load float, ptr %arrayidx, align 4
%c = fcmp ugt float %r.07, %6
%add = select i1 %c, float %r.07, float %6
%inc = add nuw nsw i32 %i.08, 1
@@ -1650,7 +1650,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
ret float %r.0.lcssa
}
-define i32 @add4i32(i32* noalias nocapture readonly %x, i32 %n) {
+define i32 @add4i32(ptr noalias nocapture readonly %x, i32 %n) {
; CHECK-LABEL: add4i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1684,9 +1684,9 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i32 [ 0, %vector.ph ], [ %4, %vector.body ]
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n)
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
%2 = select <4 x i1> %active.lane.mask, <4 x i32> %wide.masked.load, <4 x i32> zeroinitializer
%3 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %2)
%4 = add i32 %3, %vec.phi
@@ -1699,7 +1699,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret i32 %s.0.lcssa
}
-define i32 @mla4i32(i32* noalias nocapture readonly %x, i32* noalias nocapture readonly %y, i32 %n) {
+define i32 @mla4i32(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, i32 %n) {
; CHECK-LABEL: mla4i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1734,12 +1734,12 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i32 [ 0, %vector.ph ], [ %7, %vector.body ]
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n)
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
- %2 = getelementptr inbounds i32, i32* %y, i32 %index
- %3 = bitcast i32* %2 to <4 x i32>*
- %wide.masked.load13 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+ %2 = getelementptr inbounds i32, ptr %y, i32 %index
+ %3 = bitcast ptr %2 to ptr
+ %wide.masked.load13 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %3, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
%4 = mul nsw <4 x i32> %wide.masked.load13, %wide.masked.load
%5 = select <4 x i1> %active.lane.mask, <4 x i32> %4, <4 x i32> zeroinitializer
%6 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %5)
@@ -1753,7 +1753,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret i32 %s.0.lcssa
}
-define i32 @add8i32(i16* noalias nocapture readonly %x, i32 %n) {
+define i32 @add8i32(ptr noalias nocapture readonly %x, i32 %n) {
; CHECK-LABEL: add8i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1787,9 +1787,9 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i32 [ 0, %vector.ph ], [ %5, %vector.body ]
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %n)
- %0 = getelementptr inbounds i16, i16* %x, i32 %index
- %1 = bitcast i16* %0 to <8 x i16>*
- %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
+ %0 = getelementptr inbounds i16, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
%2 = sext <8 x i16> %wide.masked.load to <8 x i32>
%3 = select <8 x i1> %active.lane.mask, <8 x i32> %2, <8 x i32> zeroinitializer
%4 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %3)
@@ -1803,7 +1803,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret i32 %s.0.lcssa
}
-define i32 @mla8i32(i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y, i32 %n) {
+define i32 @mla8i32(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, i32 %n) {
; CHECK-LABEL: mla8i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1838,13 +1838,13 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i32 [ 0, %vector.ph ], [ %9, %vector.body ]
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %n)
- %0 = getelementptr inbounds i16, i16* %x, i32 %index
- %1 = bitcast i16* %0 to <8 x i16>*
- %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
+ %0 = getelementptr inbounds i16, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
%2 = sext <8 x i16> %wide.masked.load to <8 x i32>
- %3 = getelementptr inbounds i16, i16* %y, i32 %index
- %4 = bitcast i16* %3 to <8 x i16>*
- %wide.masked.load14 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %4, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
+ %3 = getelementptr inbounds i16, ptr %y, i32 %index
+ %4 = bitcast ptr %3 to ptr
+ %wide.masked.load14 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %4, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
%5 = sext <8 x i16> %wide.masked.load14 to <8 x i32>
%6 = mul nsw <8 x i32> %5, %2
%7 = select <8 x i1> %active.lane.mask, <8 x i32> %6, <8 x i32> zeroinitializer
@@ -1859,7 +1859,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret i32 %s.0.lcssa
}
-define i32 @add16i32(i8* noalias nocapture readonly %x, i32 %n) {
+define i32 @add16i32(ptr noalias nocapture readonly %x, i32 %n) {
; CHECK-LABEL: add16i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1893,9 +1893,9 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i32 [ 0, %vector.ph ], [ %5, %vector.body ]
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %n)
- %0 = getelementptr inbounds i8, i8* %x, i32 %index
- %1 = bitcast i8* %0 to <16 x i8>*
- %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
+ %0 = getelementptr inbounds i8, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
%2 = zext <16 x i8> %wide.masked.load to <16 x i32>
%3 = select <16 x i1> %active.lane.mask, <16 x i32> %2, <16 x i32> zeroinitializer
%4 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %3)
@@ -1909,7 +1909,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret i32 %s.0.lcssa
}
-define i32 @mla16i32(i8* noalias nocapture readonly %x, i8* noalias nocapture readonly %y, i32 %n) {
+define i32 @mla16i32(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, i32 %n) {
; CHECK-LABEL: mla16i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1944,13 +1944,13 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i32 [ 0, %vector.ph ], [ %9, %vector.body ]
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %n)
- %0 = getelementptr inbounds i8, i8* %x, i32 %index
- %1 = bitcast i8* %0 to <16 x i8>*
- %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
+ %0 = getelementptr inbounds i8, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
%2 = zext <16 x i8> %wide.masked.load to <16 x i32>
- %3 = getelementptr inbounds i8, i8* %y, i32 %index
- %4 = bitcast i8* %3 to <16 x i8>*
- %wide.masked.load14 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %4, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
+ %3 = getelementptr inbounds i8, ptr %y, i32 %index
+ %4 = bitcast ptr %3 to ptr
+ %wide.masked.load14 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %4, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
%5 = zext <16 x i8> %wide.masked.load14 to <16 x i32>
%6 = mul nuw nsw <16 x i32> %5, %2
%7 = select <16 x i1> %active.lane.mask, <16 x i32> %6, <16 x i32> zeroinitializer
@@ -1965,7 +1965,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret i32 %s.0.lcssa
}
-define signext i16 @add8i16(i16* noalias nocapture readonly %x, i32 %n) {
+define signext i16 @add8i16(ptr noalias nocapture readonly %x, i32 %n) {
; CHECK-LABEL: add8i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -1999,9 +1999,9 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i16 [ 0, %vector.ph ], [ %4, %vector.body ]
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %n)
- %0 = getelementptr inbounds i16, i16* %x, i32 %index
- %1 = bitcast i16* %0 to <8 x i16>*
- %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
+ %0 = getelementptr inbounds i16, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
%2 = select <8 x i1> %active.lane.mask, <8 x i16> %wide.masked.load, <8 x i16> zeroinitializer
%3 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %2)
%4 = add i16 %3, %vec.phi
@@ -2014,7 +2014,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret i16 %s.0.lcssa
}
-define signext i16 @mla8i16(i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y, i32 %n) {
+define signext i16 @mla8i16(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, i32 %n) {
; CHECK-LABEL: mla8i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -2049,12 +2049,12 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i16 [ 0, %vector.ph ], [ %7, %vector.body ]
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %n)
- %0 = getelementptr inbounds i16, i16* %x, i32 %index
- %1 = bitcast i16* %0 to <8 x i16>*
- %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
- %2 = getelementptr inbounds i16, i16* %y, i32 %index
- %3 = bitcast i16* %2 to <8 x i16>*
- %wide.masked.load16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %3, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
+ %0 = getelementptr inbounds i16, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
+ %2 = getelementptr inbounds i16, ptr %y, i32 %index
+ %3 = bitcast ptr %2 to ptr
+ %wide.masked.load16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %3, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
%4 = mul <8 x i16> %wide.masked.load16, %wide.masked.load
%5 = select <8 x i1> %active.lane.mask, <8 x i16> %4, <8 x i16> zeroinitializer
%6 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %5)
@@ -2068,7 +2068,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret i16 %s.0.lcssa
}
-define signext i16 @add16i16(i8* noalias nocapture readonly %x, i32 %n) {
+define signext i16 @add16i16(ptr noalias nocapture readonly %x, i32 %n) {
; CHECK-LABEL: add16i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -2102,9 +2102,9 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i16 [ 0, %vector.ph ], [ %5, %vector.body ]
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %n)
- %0 = getelementptr inbounds i8, i8* %x, i32 %index
- %1 = bitcast i8* %0 to <16 x i8>*
- %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
+ %0 = getelementptr inbounds i8, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
%2 = zext <16 x i8> %wide.masked.load to <16 x i16>
%3 = select <16 x i1> %active.lane.mask, <16 x i16> %2, <16 x i16> zeroinitializer
%4 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %3)
@@ -2118,7 +2118,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret i16 %s.0.lcssa
}
-define signext i16 @mla16i16(i8* noalias nocapture readonly %x, i8* noalias nocapture readonly %y, i32 %n) {
+define signext i16 @mla16i16(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, i32 %n) {
; CHECK-LABEL: mla16i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -2153,13 +2153,13 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i16 [ 0, %vector.ph ], [ %9, %vector.body ]
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %n)
- %0 = getelementptr inbounds i8, i8* %x, i32 %index
- %1 = bitcast i8* %0 to <16 x i8>*
- %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
+ %0 = getelementptr inbounds i8, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
%2 = zext <16 x i8> %wide.masked.load to <16 x i16>
- %3 = getelementptr inbounds i8, i8* %y, i32 %index
- %4 = bitcast i8* %3 to <16 x i8>*
- %wide.masked.load18 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %4, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
+ %3 = getelementptr inbounds i8, ptr %y, i32 %index
+ %4 = bitcast ptr %3 to ptr
+ %wide.masked.load18 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %4, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
%5 = zext <16 x i8> %wide.masked.load18 to <16 x i16>
%6 = mul nuw <16 x i16> %5, %2
%7 = select <16 x i1> %active.lane.mask, <16 x i16> %6, <16 x i16> zeroinitializer
@@ -2174,7 +2174,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret i16 %s.0.lcssa
}
-define zeroext i8 @add16i8(i8* noalias nocapture readonly %x, i32 %n) {
+define zeroext i8 @add16i8(ptr noalias nocapture readonly %x, i32 %n) {
; CHECK-LABEL: add16i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -2208,9 +2208,9 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i8 [ 0, %vector.ph ], [ %4, %vector.body ]
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %n)
- %0 = getelementptr inbounds i8, i8* %x, i32 %index
- %1 = bitcast i8* %0 to <16 x i8>*
- %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
+ %0 = getelementptr inbounds i8, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
%2 = select <16 x i1> %active.lane.mask, <16 x i8> %wide.masked.load, <16 x i8> zeroinitializer
%3 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %2)
%4 = add i8 %3, %vec.phi
@@ -2223,7 +2223,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret i8 %s.0.lcssa
}
-define zeroext i8 @mla16i8(i8* noalias nocapture readonly %x, i8* noalias nocapture readonly %y, i32 %n) {
+define zeroext i8 @mla16i8(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, i32 %n) {
; CHECK-LABEL: mla16i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -2258,12 +2258,12 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i8 [ 0, %vector.ph ], [ %7, %vector.body ]
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %n)
- %0 = getelementptr inbounds i8, i8* %x, i32 %index
- %1 = bitcast i8* %0 to <16 x i8>*
- %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
- %2 = getelementptr inbounds i8, i8* %y, i32 %index
- %3 = bitcast i8* %2 to <16 x i8>*
- %wide.masked.load15 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %3, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
+ %0 = getelementptr inbounds i8, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
+ %2 = getelementptr inbounds i8, ptr %y, i32 %index
+ %3 = bitcast ptr %2 to ptr
+ %wide.masked.load15 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %3, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
%4 = mul <16 x i8> %wide.masked.load15, %wide.masked.load
%5 = select <16 x i1> %active.lane.mask, <16 x i8> %4, <16 x i8> zeroinitializer
%6 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %5)
@@ -2277,7 +2277,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret i8 %s.0.lcssa
}
-define i64 @add4i64(i32* noalias nocapture readonly %x, i32 %n) {
+define i64 @add4i64(ptr noalias nocapture readonly %x, i32 %n) {
; CHECK-LABEL: add4i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -2313,9 +2313,9 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i64 [ 0, %vector.ph ], [ %5, %vector.body ]
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n)
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
%2 = sext <4 x i32> %wide.masked.load to <4 x i64>
%3 = select <4 x i1> %active.lane.mask, <4 x i64> %2, <4 x i64> zeroinitializer
%4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %3)
@@ -2329,7 +2329,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret i64 %s.0.lcssa
}
-define i64 @mla4i64(i32* noalias nocapture readonly %x, i32* noalias nocapture readonly %y, i32 %n) {
+define i64 @mla4i64(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, i32 %n) {
; CHECK-LABEL: mla4i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -2366,13 +2366,13 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i64 [ 0, %vector.ph ], [ %9, %vector.body ]
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n)
- %0 = getelementptr inbounds i32, i32* %x, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+ %0 = getelementptr inbounds i32, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
%2 = sext <4 x i32> %wide.masked.load to <4 x i64>
- %3 = getelementptr inbounds i32, i32* %y, i32 %index
- %4 = bitcast i32* %3 to <4 x i32>*
- %wide.masked.load14 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %4, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+ %3 = getelementptr inbounds i32, ptr %y, i32 %index
+ %4 = bitcast ptr %3 to ptr
+ %wide.masked.load14 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %4, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
%5 = sext <4 x i32> %wide.masked.load14 to <4 x i64>
%6 = mul nsw <4 x i64> %5, %2
%7 = select <4 x i1> %active.lane.mask, <4 x i64> %6, <4 x i64> zeroinitializer
@@ -2387,7 +2387,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret i64 %s.0.lcssa
}
-define i64 @mla8i64(i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y, i32 %n) {
+define i64 @mla8i64(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, i32 %n) {
; CHECK-LABEL: mla8i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -2424,13 +2424,13 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi i64 [ 0, %vector.ph ], [ %9, %vector.body ]
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %n)
- %0 = getelementptr inbounds i16, i16* %x, i32 %index
- %1 = bitcast i16* %0 to <8 x i16>*
- %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
+ %0 = getelementptr inbounds i16, ptr %x, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
%2 = sext <8 x i16> %wide.masked.load to <8 x i64>
- %3 = getelementptr inbounds i16, i16* %y, i32 %index
- %4 = bitcast i16* %3 to <8 x i16>*
- %wide.masked.load14 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %4, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
+ %3 = getelementptr inbounds i16, ptr %y, i32 %index
+ %4 = bitcast ptr %3 to ptr
+ %wide.masked.load14 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %4, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
%5 = sext <8 x i16> %wide.masked.load14 to <8 x i64>
%6 = mul nsw <8 x i64> %5, %2
%7 = select <8 x i1> %active.lane.mask, <8 x i64> %6, <8 x i64> zeroinitializer
@@ -2446,12 +2446,12 @@ for.cond.cleanup: ; preds = %vector.body, %entry
}
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) #1
-declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #2
+declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #2
declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) #1
-declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>) #2
+declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>) #2
declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) #3
declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32) #1
-declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) #2
+declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>) #2
declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) #3
declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) #3
declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) #3
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll
index e6cb00273a273..04be18e3dd873 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll
@@ -191,7 +191,7 @@ define arm_aapcs_vfpcc <8 x half> @minpredf16_c(<8 x half> %a, <8 x half> %b) {
; Loops
-define void @loop_absmax32(float* nocapture readonly %0, i32 %1, float* nocapture %2) {
+define void @loop_absmax32(ptr nocapture readonly %0, i32 %1, ptr nocapture %2) {
; CHECK-LABEL: loop_absmax32:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r7, lr}
@@ -224,10 +224,10 @@ define void @loop_absmax32(float* nocapture readonly %0, i32 %1, float* nocaptur
6: ; preds = %3, %6
%7 = phi i32 [ %16, %6 ], [ %4, %3 ]
%8 = phi <4 x float> [ %15, %6 ], [ zeroinitializer, %3 ]
- %9 = phi float* [ %12, %6 ], [ %0, %3 ]
- %10 = bitcast float* %9 to <4 x float>*
- %11 = load <4 x float>, <4 x float>* %10, align 4
- %12 = getelementptr inbounds float, float* %9, i32 4
+ %9 = phi ptr [ %12, %6 ], [ %0, %3 ]
+ %10 = bitcast ptr %9 to ptr
+ %11 = load <4 x float>, ptr %10, align 4
+ %12 = getelementptr inbounds float, ptr %9, i32 4
%13 = tail call fast <4 x float> @llvm.fabs.v4f32(<4 x float> %11)
%14 = tail call fast <4 x float> @llvm.fabs.v4f32(<4 x float> %8)
%15 = tail call fast <4 x float> @llvm.maxnum.v4f32(<4 x float> %14, <4 x float> %13)
@@ -238,11 +238,11 @@ define void @loop_absmax32(float* nocapture readonly %0, i32 %1, float* nocaptur
18: ; preds = %6, %3
%19 = phi <4 x float> [ zeroinitializer, %3 ], [ %15, %6 ]
%20 = tail call fast float @llvm.arm.mve.maxnmav.f32.v4f32(float 0.000000e+00, <4 x float> %19)
- store float %20, float* %2, align 4
+ store float %20, ptr %2, align 4
ret void
}
-define void @loop_absmax32_c(float* nocapture readonly %0, i32 %1, float* nocapture %2) {
+define void @loop_absmax32_c(ptr nocapture readonly %0, i32 %1, ptr nocapture %2) {
; CHECK-LABEL: loop_absmax32_c:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r7, lr}
@@ -275,10 +275,10 @@ define void @loop_absmax32_c(float* nocapture readonly %0, i32 %1, float* nocapt
6: ; preds = %3, %6
%7 = phi i32 [ %16, %6 ], [ %4, %3 ]
%8 = phi <4 x float> [ %15, %6 ], [ zeroinitializer, %3 ]
- %9 = phi float* [ %12, %6 ], [ %0, %3 ]
- %10 = bitcast float* %9 to <4 x float>*
- %11 = load <4 x float>, <4 x float>* %10, align 4
- %12 = getelementptr inbounds float, float* %9, i32 4
+ %9 = phi ptr [ %12, %6 ], [ %0, %3 ]
+ %10 = bitcast ptr %9 to ptr
+ %11 = load <4 x float>, ptr %10, align 4
+ %12 = getelementptr inbounds float, ptr %9, i32 4
%13 = tail call fast <4 x float> @llvm.fabs.v4f32(<4 x float> %11)
%14 = tail call fast <4 x float> @llvm.fabs.v4f32(<4 x float> %8)
%15 = tail call fast <4 x float> @llvm.maxnum.v4f32(<4 x float> %13, <4 x float> %14)
@@ -289,11 +289,11 @@ define void @loop_absmax32_c(float* nocapture readonly %0, i32 %1, float* nocapt
18: ; preds = %6, %3
%19 = phi <4 x float> [ zeroinitializer, %3 ], [ %15, %6 ]
%20 = tail call fast float @llvm.arm.mve.maxnmav.f32.v4f32(float 0.000000e+00, <4 x float> %19)
- store float %20, float* %2, align 4
+ store float %20, ptr %2, align 4
ret void
}
-define void @loop_absmax32_pred(float* %0, i32 %1, float* nocapture %2) {
+define void @loop_absmax32_pred(ptr %0, i32 %1, ptr nocapture %2) {
; CHECK-LABEL: loop_absmax32_pred:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r7, lr}
@@ -320,11 +320,11 @@ define void @loop_absmax32_pred(float* %0, i32 %1, float* nocapture %2) {
4: ; preds = %4, %3
%5 = phi <4 x float> [ zeroinitializer, %3 ], [ %12, %4 ]
%6 = phi i32 [ %1, %3 ], [ %13, %4 ]
- %7 = phi float* [ %0, %3 ], [ %11, %4 ]
+ %7 = phi ptr [ %0, %3 ], [ %11, %4 ]
%8 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %6)
- %9 = bitcast float* %7 to <4 x float>*
- %10 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %9, i32 4, <4 x i1> %8, <4 x float> zeroinitializer)
- %11 = getelementptr inbounds float, float* %7, i32 4
+ %9 = bitcast ptr %7 to ptr
+ %10 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %9, i32 4, <4 x i1> %8, <4 x float> zeroinitializer)
+ %11 = getelementptr inbounds float, ptr %7, i32 4
%12 = tail call fast <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> %5, <4 x float> %10, <4 x i1> %8)
%13 = add nsw i32 %6, -4
%14 = icmp sgt i32 %6, 4
@@ -332,11 +332,11 @@ define void @loop_absmax32_pred(float* %0, i32 %1, float* nocapture %2) {
15: ; preds = %4
%16 = tail call fast float @llvm.arm.mve.maxnmav.f32.v4f32(float 0.000000e+00, <4 x float> %12)
- store float %16, float* %2, align 4
+ store float %16, ptr %2, align 4
ret void
}
-define void @loop_absmax32_pred_c(float* %0, i32 %1, float* nocapture %2) {
+define void @loop_absmax32_pred_c(ptr %0, i32 %1, ptr nocapture %2) {
; CHECK-LABEL: loop_absmax32_pred_c:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r7, lr}
@@ -364,11 +364,11 @@ define void @loop_absmax32_pred_c(float* %0, i32 %1, float* nocapture %2) {
4: ; preds = %4, %3
%5 = phi <4 x float> [ zeroinitializer, %3 ], [ %12, %4 ]
%6 = phi i32 [ %1, %3 ], [ %13, %4 ]
- %7 = phi float* [ %0, %3 ], [ %11, %4 ]
+ %7 = phi ptr [ %0, %3 ], [ %11, %4 ]
%8 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %6)
- %9 = bitcast float* %7 to <4 x float>*
- %10 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %9, i32 4, <4 x i1> %8, <4 x float> zeroinitializer)
- %11 = getelementptr inbounds float, float* %7, i32 4
+ %9 = bitcast ptr %7 to ptr
+ %10 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %9, i32 4, <4 x i1> %8, <4 x float> zeroinitializer)
+ %11 = getelementptr inbounds float, ptr %7, i32 4
%12 = tail call fast <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> %10, <4 x float> %5, <4 x i1> %8)
%13 = add nsw i32 %6, -4
%14 = icmp sgt i32 %6, 4
@@ -376,7 +376,7 @@ define void @loop_absmax32_pred_c(float* %0, i32 %1, float* nocapture %2) {
15: ; preds = %4
%16 = tail call fast float @llvm.arm.mve.maxnmav.f32.v4f32(float 0.000000e+00, <4 x float> %12)
- store float %16, float* %2, align 4
+ store float %16, ptr %2, align 4
ret void
}
@@ -385,7 +385,7 @@ define void @loop_absmax32_pred_c(float* %0, i32 %1, float* nocapture %2) {
-define void @loop_absmax16(half* nocapture readonly %0, i32 %1, half* nocapture %2) {
+define void @loop_absmax16(ptr nocapture readonly %0, i32 %1, ptr nocapture %2) {
; CHECK-LABEL: loop_absmax16:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r7, lr}
@@ -418,10 +418,10 @@ define void @loop_absmax16(half* nocapture readonly %0, i32 %1, half* nocapture
6: ; preds = %3, %6
%7 = phi i32 [ %16, %6 ], [ %4, %3 ]
%8 = phi <8 x half> [ %15, %6 ], [ zeroinitializer, %3 ]
- %9 = phi half* [ %12, %6 ], [ %0, %3 ]
- %10 = bitcast half* %9 to <8 x half>*
- %11 = load <8 x half>, <8 x half>* %10, align 4
- %12 = getelementptr inbounds half, half* %9, i32 4
+ %9 = phi ptr [ %12, %6 ], [ %0, %3 ]
+ %10 = bitcast ptr %9 to ptr
+ %11 = load <8 x half>, ptr %10, align 4
+ %12 = getelementptr inbounds half, ptr %9, i32 4
%13 = tail call fast <8 x half> @llvm.fabs.v8f16(<8 x half> %11)
%14 = tail call fast <8 x half> @llvm.fabs.v8f16(<8 x half> %8)
%15 = tail call fast <8 x half> @llvm.maxnum.v8f16(<8 x half> %14, <8 x half> %13)
@@ -432,11 +432,11 @@ define void @loop_absmax16(half* nocapture readonly %0, i32 %1, half* nocapture
18: ; preds = %6, %3
%19 = phi <8 x half> [ zeroinitializer, %3 ], [ %15, %6 ]
%20 = tail call fast half @llvm.arm.mve.maxnmav.f16.v8f16(half 0.000000e+00, <8 x half> %19)
- store half %20, half* %2, align 4
+ store half %20, ptr %2, align 4
ret void
}
-define void @loop_absmax16_c(half* nocapture readonly %0, i32 %1, half* nocapture %2) {
+define void @loop_absmax16_c(ptr nocapture readonly %0, i32 %1, ptr nocapture %2) {
; CHECK-LABEL: loop_absmax16_c:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r7, lr}
@@ -469,10 +469,10 @@ define void @loop_absmax16_c(half* nocapture readonly %0, i32 %1, half* nocaptur
6: ; preds = %3, %6
%7 = phi i32 [ %16, %6 ], [ %4, %3 ]
%8 = phi <8 x half> [ %15, %6 ], [ zeroinitializer, %3 ]
- %9 = phi half* [ %12, %6 ], [ %0, %3 ]
- %10 = bitcast half* %9 to <8 x half>*
- %11 = load <8 x half>, <8 x half>* %10, align 4
- %12 = getelementptr inbounds half, half* %9, i32 4
+ %9 = phi ptr [ %12, %6 ], [ %0, %3 ]
+ %10 = bitcast ptr %9 to ptr
+ %11 = load <8 x half>, ptr %10, align 4
+ %12 = getelementptr inbounds half, ptr %9, i32 4
%13 = tail call fast <8 x half> @llvm.fabs.v8f16(<8 x half> %11)
%14 = tail call fast <8 x half> @llvm.fabs.v8f16(<8 x half> %8)
%15 = tail call fast <8 x half> @llvm.maxnum.v8f16(<8 x half> %13, <8 x half> %14)
@@ -483,11 +483,11 @@ define void @loop_absmax16_c(half* nocapture readonly %0, i32 %1, half* nocaptur
18: ; preds = %6, %3
%19 = phi <8 x half> [ zeroinitializer, %3 ], [ %15, %6 ]
%20 = tail call fast half @llvm.arm.mve.maxnmav.f16.v8f16(half 0.000000e+00, <8 x half> %19)
- store half %20, half* %2, align 4
+ store half %20, ptr %2, align 4
ret void
}
-define void @loop_absmax16_pred(half* %0, i32 %1, half* nocapture %2) {
+define void @loop_absmax16_pred(ptr %0, i32 %1, ptr nocapture %2) {
; CHECK-LABEL: loop_absmax16_pred:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r7, lr}
@@ -514,11 +514,11 @@ define void @loop_absmax16_pred(half* %0, i32 %1, half* nocapture %2) {
4: ; preds = %4, %3
%5 = phi <8 x half> [ zeroinitializer, %3 ], [ %12, %4 ]
%6 = phi i32 [ %1, %3 ], [ %13, %4 ]
- %7 = phi half* [ %0, %3 ], [ %11, %4 ]
+ %7 = phi ptr [ %0, %3 ], [ %11, %4 ]
%8 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %6)
- %9 = bitcast half* %7 to <8 x half>*
- %10 = tail call fast <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %9, i32 4, <8 x i1> %8, <8 x half> zeroinitializer)
- %11 = getelementptr inbounds half, half* %7, i32 4
+ %9 = bitcast ptr %7 to ptr
+ %10 = tail call fast <8 x half> @llvm.masked.load.v8f16.p0(ptr %9, i32 4, <8 x i1> %8, <8 x half> zeroinitializer)
+ %11 = getelementptr inbounds half, ptr %7, i32 4
%12 = tail call fast <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> %5, <8 x half> %10, <8 x i1> %8)
%13 = add nsw i32 %6, -8
%14 = icmp sgt i32 %6, 8
@@ -526,11 +526,11 @@ define void @loop_absmax16_pred(half* %0, i32 %1, half* nocapture %2) {
15: ; preds = %4
%16 = tail call fast half @llvm.arm.mve.maxnmav.f16.v8f16(half 0.000000e+00, <8 x half> %12)
- store half %16, half* %2, align 4
+ store half %16, ptr %2, align 4
ret void
}
-define void @loop_absmax16_pred_c(half* %0, i32 %1, half* nocapture %2) {
+define void @loop_absmax16_pred_c(ptr %0, i32 %1, ptr nocapture %2) {
; CHECK-LABEL: loop_absmax16_pred_c:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r7, lr}
@@ -558,11 +558,11 @@ define void @loop_absmax16_pred_c(half* %0, i32 %1, half* nocapture %2) {
4: ; preds = %4, %3
%5 = phi <8 x half> [ zeroinitializer, %3 ], [ %12, %4 ]
%6 = phi i32 [ %1, %3 ], [ %13, %4 ]
- %7 = phi half* [ %0, %3 ], [ %11, %4 ]
+ %7 = phi ptr [ %0, %3 ], [ %11, %4 ]
%8 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %6)
- %9 = bitcast half* %7 to <8 x half>*
- %10 = tail call fast <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %9, i32 4, <8 x i1> %8, <8 x half> zeroinitializer)
- %11 = getelementptr inbounds half, half* %7, i32 4
+ %9 = bitcast ptr %7 to ptr
+ %10 = tail call fast <8 x half> @llvm.masked.load.v8f16.p0(ptr %9, i32 4, <8 x i1> %8, <8 x half> zeroinitializer)
+ %11 = getelementptr inbounds half, ptr %7, i32 4
%12 = tail call fast <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> %10, <8 x half> %5, <8 x i1> %8)
%13 = add nsw i32 %6, -8
%14 = icmp sgt i32 %6, 8
@@ -570,7 +570,7 @@ define void @loop_absmax16_pred_c(half* %0, i32 %1, half* nocapture %2) {
15: ; preds = %4
%16 = tail call fast half @llvm.arm.mve.maxnmav.f16.v8f16(half 0.000000e+00, <8 x half> %12)
- store half %16, half* %2, align 4
+ store half %16, ptr %2, align 4
ret void
}
@@ -579,7 +579,7 @@ define void @loop_absmax16_pred_c(half* %0, i32 %1, half* nocapture %2) {
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
-declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
+declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>)
declare <4 x float> @llvm.arm.mve.vminnma.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>)
declare <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>)
declare float @llvm.arm.mve.maxnmav.f32.v4f32(float, <4 x float>)
@@ -588,7 +588,7 @@ declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
declare <8 x i1> @llvm.arm.mve.vctp16(i32)
-declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32 immarg, <8 x i1>, <8 x half>)
+declare <8 x half> @llvm.masked.load.v8f16.p0(ptr, i32 immarg, <8 x i1>, <8 x half>)
declare <8 x half> @llvm.arm.mve.vminnma.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>)
declare <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>)
declare half @llvm.arm.mve.maxnmav.f16.v8f16(half, <8 x half>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll b/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll
index ac4abdbf45643..f51f6c0a1f0eb 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
-define void @vmovl_s32(i32* noalias nocapture %d, i32* nocapture readonly %s, i32 %n) {
+define void @vmovl_s32(ptr noalias nocapture %d, ptr nocapture readonly %s, i32 %n) {
; CHECK-LABEL: vmovl_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -31,14 +31,14 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n)
- %0 = getelementptr inbounds i32, i32* %s, i32 %index
- %1 = bitcast i32* %0 to <4 x i32>*
- %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> poison)
+ %0 = getelementptr inbounds i32, ptr %s, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> poison)
%2 = shl <4 x i32> %wide.masked.load, <i32 16, i32 16, i32 16, i32 16>
%3 = ashr exact <4 x i32> %2, <i32 16, i32 16, i32 16, i32 16>
- %4 = getelementptr inbounds i32, i32* %d, i32 %index
- %5 = bitcast i32* %4 to <4 x i32>*
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %5, i32 4, <4 x i1> %active.lane.mask)
+ %4 = getelementptr inbounds i32, ptr %d, i32 %index
+ %5 = bitcast ptr %4 to ptr
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %5, i32 4, <4 x i1> %active.lane.mask)
%index.next = add i32 %index, 4
%6 = icmp eq i32 %index.next, %n.vec
br i1 %6, label %for.cond.cleanup, label %vector.body
@@ -48,7 +48,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
}
-define void @vmovl_u16(i16* noalias nocapture %d, i16* nocapture readonly %s, i32 %n) {
+define void @vmovl_u16(ptr noalias nocapture %d, ptr nocapture readonly %s, i32 %n) {
; CHECK-LABEL: vmovl_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -78,13 +78,13 @@ vector.ph: ; preds = %entry
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %n)
- %0 = getelementptr inbounds i16, i16* %s, i32 %index
- %1 = bitcast i16* %0 to <8 x i16>*
- %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> poison)
+ %0 = getelementptr inbounds i16, ptr %s, i32 %index
+ %1 = bitcast ptr %0 to ptr
+ %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> poison)
%2 = and <8 x i16> %wide.masked.load, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
- %3 = getelementptr inbounds i16, i16* %d, i32 %index
- %4 = bitcast i16* %3 to <8 x i16>*
- call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %2, <8 x i16>* %4, i32 2, <8 x i1> %active.lane.mask)
+ %3 = getelementptr inbounds i16, ptr %d, i32 %index
+ %4 = bitcast ptr %3 to ptr
+ call void @llvm.masked.store.v8i16.p0(<8 x i16> %2, ptr %4, i32 2, <8 x i1> %active.lane.mask)
%index.next = add i32 %index, 8
%5 = icmp eq i32 %index.next, %n.vec
br i1 %5, label %for.cond.cleanup, label %vector.body
@@ -93,7 +93,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret void
}
-define void @vmovl_16to32(i16* %d, i16* %s, i32 %n) {
+define void @vmovl_16to32(ptr %d, ptr %s, i32 %n) {
; CHECK-LABEL: vmovl_16to32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -131,25 +131,25 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
- %d.addr.016 = phi i16* [ %add.ptr3, %for.body ], [ %d, %entry ]
- %s.addr.015 = phi i16* [ %add.ptr, %for.body ], [ %s, %entry ]
+ %d.addr.016 = phi ptr [ %add.ptr3, %for.body ], [ %d, %entry ]
+ %s.addr.015 = phi ptr [ %add.ptr, %for.body ], [ %s, %entry ]
%i.014 = phi i32 [ %sub, %for.body ], [ %n, %entry ]
%0 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %i.014)
- %1 = bitcast i16* %s.addr.015 to <8 x i16>*
- %2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %1, i32 2, <8 x i1> %0, <8 x i16> <i16 0, i16 poison, i16 0, i16 poison, i16 0, i16 poison, i16 0, i16 poison>)
- %add.ptr = getelementptr inbounds i16, i16* %s.addr.015, i32 8
+ %1 = bitcast ptr %s.addr.015 to ptr
+ %2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %1, i32 2, <8 x i1> %0, <8 x i16> <i16 0, i16 poison, i16 0, i16 poison, i16 0, i16 poison, i16 0, i16 poison>)
+ %add.ptr = getelementptr inbounds i16, ptr %s.addr.015, i32 8
%3 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%4 = sext <4 x i16> %3 to <4 x i32>
%5 = bitcast <4 x i32> %4 to <8 x i16>
- %6 = bitcast i16* %d.addr.016 to <8 x i16>*
- tail call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %5, <8 x i16>* %6, i32 2, <8 x i1> %0)
- %add.ptr3 = getelementptr inbounds i16, i16* %d.addr.016, i32 8
+ %6 = bitcast ptr %d.addr.016 to ptr
+ tail call void @llvm.masked.store.v8i16.p0(<8 x i16> %5, ptr %6, i32 2, <8 x i1> %0)
+ %add.ptr3 = getelementptr inbounds i16, ptr %d.addr.016, i32 8
%sub = add nsw i32 %i.014, -8
%cmp = icmp sgt i32 %i.014, 8
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
-define void @sunken_vmovl(i8* noalias %pTarget, i16 signext %iTargetStride, i8* noalias %pchAlpha, i16 signext %iAlphaStride, i16 %0, i8 zeroext %Colour) {
+define void @sunken_vmovl(ptr noalias %pTarget, i16 signext %iTargetStride, ptr noalias %pchAlpha, i16 signext %iAlphaStride, i16 %0, i8 zeroext %Colour) {
; CHECK-LABEL: sunken_vmovl:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -177,34 +177,34 @@ define void @sunken_vmovl(i8* noalias %pTarget, i16 signext %iTargetStride, i8*
entry:
%conv3 = sext i16 %0 to i32
%1 = zext i8 %Colour to i32
- %2 = bitcast i8* %pTarget to <8 x i8>*
- %3 = load <8 x i8>, <8 x i8>* %2, align 1
- %4 = bitcast i8* %pchAlpha to <8 x i8>*
- %5 = load <8 x i8>, <8 x i8>* %4, align 1
+ %2 = bitcast ptr %pTarget to ptr
+ %3 = load <8 x i8>, ptr %2, align 1
+ %4 = bitcast ptr %pchAlpha to ptr
+ %5 = load <8 x i8>, ptr %4, align 1
br label %do.body
do.body: ; preds = %do.body, %entry
- %pchAlpha.addr.0.pn = phi i8* [ %pchAlpha, %entry ], [ %pAlpha.0, %do.body ]
- %pTarget8.0 = phi i8* [ %pTarget, %entry ], [ %add.ptr5, %do.body ]
+ %pchAlpha.addr.0.pn = phi ptr [ %pchAlpha, %entry ], [ %pAlpha.0, %do.body ]
+ %pTarget8.0 = phi ptr [ %pTarget, %entry ], [ %add.ptr5, %do.body ]
%blkCnt.0 = phi i32 [ %conv3, %entry ], [ %sub, %do.body ]
%vecTarget.0.in = phi <8 x i8> [ %3, %entry ], [ %10, %do.body ]
%vecTransp.0.in = phi <8 x i8> [ %5, %entry ], [ %13, %do.body ]
%vecTransp.0 = zext <8 x i8> %vecTransp.0.in to <8 x i16>
%vecTarget.0 = zext <8 x i8> %vecTarget.0.in to <8 x i16>
- %pAlpha.0 = getelementptr inbounds i8, i8* %pchAlpha.addr.0.pn, i32 8
+ %pAlpha.0 = getelementptr inbounds i8, ptr %pchAlpha.addr.0.pn, i32 8
%6 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %blkCnt.0)
%7 = tail call <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16> <i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256>, <8 x i16> %vecTransp.0, <8 x i1> %6, <8 x i16> undef)
%8 = tail call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> %vecTarget.0, <8 x i16> %7, <8 x i1> %6, <8 x i16> undef)
- %add.ptr5 = getelementptr inbounds i8, i8* %pTarget8.0, i32 8
- %9 = bitcast i8* %add.ptr5 to <8 x i8>*
- %10 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* nonnull %9, i32 1, <8 x i1> %6, <8 x i8> zeroinitializer)
+ %add.ptr5 = getelementptr inbounds i8, ptr %pTarget8.0, i32 8
+ %9 = bitcast ptr %add.ptr5 to ptr
+ %10 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr nonnull %9, i32 1, <8 x i1> %6, <8 x i8> zeroinitializer)
%11 = tail call <8 x i16> @llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> %8, <8 x i16> %vecTransp.0, i32 %1, <8 x i1> %6)
- %12 = bitcast i8* %pAlpha.0 to <8 x i8>*
- %13 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* nonnull %12, i32 1, <8 x i1> %6, <8 x i8> zeroinitializer)
+ %12 = bitcast ptr %pAlpha.0 to ptr
+ %13 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr nonnull %12, i32 1, <8 x i1> %6, <8 x i8> zeroinitializer)
%14 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %11, i32 8, i32 1, <8 x i1> %6, <8 x i16> %11)
%15 = trunc <8 x i16> %14 to <8 x i8>
- %16 = bitcast i8* %pTarget8.0 to <8 x i8>*
- tail call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %15, <8 x i8>* %16, i32 1, <8 x i1> %6)
+ %16 = bitcast ptr %pTarget8.0 to ptr
+ tail call void @llvm.masked.store.v8i8.p0(<8 x i8> %15, ptr %16, i32 1, <8 x i1> %6)
%sub = add nsw i32 %blkCnt.0, -8
%cmp9 = icmp sgt i32 %blkCnt.0, 8
br i1 %cmp9, label %do.body, label %do.end
@@ -214,15 +214,15 @@ do.end: ; preds = %do.body
}
declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) #1
-declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>) #2
-declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>) #3
+declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>) #2
+declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>) #3
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) #1
-declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #2
-declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #3
+declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #2
+declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #3
declare <8 x i1> @llvm.arm.mve.vctp16(i32)
declare <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>)
declare <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>)
-declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>)
+declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32 immarg, <8 x i1>, <8 x i8>)
declare <8 x i16> @llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>)
declare <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>)
-declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32 immarg, <8 x i1>)
+declare void @llvm.masked.store.v8i8.p0(<8 x i8>, ptr, i32 immarg, <8 x i1>)
diff --git a/llvm/test/CodeGen/Thumb2/scavenge-lr.mir b/llvm/test/CodeGen/Thumb2/scavenge-lr.mir
index 68b2cf85be386..5513bed542831 100644
--- a/llvm/test/CodeGen/Thumb2/scavenge-lr.mir
+++ b/llvm/test/CodeGen/Thumb2/scavenge-lr.mir
@@ -29,42 +29,42 @@
%S = type { [32 x i8] }
- define void @f(%S* %arg) {
+ define void @f(ptr %arg) {
entry:
- %ppp..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -8
- %ppp..sroa_cast248 = bitcast %S* %ppp..sroa_idx to <8 x float>*
- %ppp.copyload = load <8 x float>, <8 x float>* %ppp..sroa_cast248, align 32
+ %ppp..sroa_idx = getelementptr inbounds %S, ptr %arg, i32 -8
+ %ppp..sroa_cast248 = bitcast ptr %ppp..sroa_idx to ptr
+ %ppp.copyload = load <8 x float>, ptr %ppp..sroa_cast248, align 32
- %xxx..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -5
- %xxx..sroa_cast248 = bitcast %S* %xxx..sroa_idx to <8 x float>*
- %xxx.copyload = load <8 x float>, <8 x float>* %xxx..sroa_cast248, align 32
+ %xxx..sroa_idx = getelementptr inbounds %S, ptr %arg, i32 -5
+ %xxx..sroa_cast248 = bitcast ptr %xxx..sroa_idx to ptr
+ %xxx.copyload = load <8 x float>, ptr %xxx..sroa_cast248, align 32
- %yyy..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -2
- %yyy..sroa_cast244 = bitcast %S* %yyy..sroa_idx to <8 x float>*
- %yyy.copyload = load <8 x float>, <8 x float>* %yyy..sroa_cast244, align 32
+ %yyy..sroa_idx = getelementptr inbounds %S, ptr %arg, i32 -2
+ %yyy..sroa_cast244 = bitcast ptr %yyy..sroa_idx to ptr
+ %yyy.copyload = load <8 x float>, ptr %yyy..sroa_cast244, align 32
- %zzz..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -7
- %zzz..sroa_cast241 = bitcast %S* %zzz..sroa_idx to <8 x float>*
- %zzz.copyload = load <8 x float>, <8 x float>* %zzz..sroa_cast241, align 32
+ %zzz..sroa_idx = getelementptr inbounds %S, ptr %arg, i32 -7
+ %zzz..sroa_cast241 = bitcast ptr %zzz..sroa_idx to ptr
+ %zzz.copyload = load <8 x float>, ptr %zzz..sroa_cast241, align 32
- %www..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -4
- %www..sroa_cast238 = bitcast %S* %www..sroa_idx to <8 x float>*
- %www.copyload = load <8 x float>, <8 x float>* %www..sroa_cast238, align 32
+ %www..sroa_idx = getelementptr inbounds %S, ptr %arg, i32 -4
+ %www..sroa_cast238 = bitcast ptr %www..sroa_idx to ptr
+ %www.copyload = load <8 x float>, ptr %www..sroa_cast238, align 32
- %uuu..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 1
- %uuu..sroa_cast235 = bitcast %S* %uuu..sroa_idx to <8 x float>*
- %uuu.copyload = load <8 x float>, <8 x float>* %uuu..sroa_cast235, align 32
+ %uuu..sroa_idx = getelementptr inbounds %S, ptr %arg, i32 1
+ %uuu..sroa_cast235 = bitcast ptr %uuu..sroa_idx to ptr
+ %uuu.copyload = load <8 x float>, ptr %uuu..sroa_cast235, align 32
- %vvv..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -6
- %vvv..sroa_cast230 = bitcast %S* %vvv..sroa_idx to <8 x float>*
- %vvv.copyload = load <8 x float>, <8 x float>* %vvv..sroa_cast230, align 32
+ %vvv..sroa_idx = getelementptr inbounds %S, ptr %arg, i32 -6
+ %vvv..sroa_cast230 = bitcast ptr %vvv..sroa_idx to ptr
+ %vvv.copyload = load <8 x float>, ptr %vvv..sroa_cast230, align 32
- %ttt..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -3
- %ttt..sroa_cast226 = bitcast %S* %ttt..sroa_idx to <8 x float>*
- %ttt.copyload = load <8 x float>, <8 x float>* %ttt..sroa_cast226, align 32
+ %ttt..sroa_idx = getelementptr inbounds %S, ptr %arg, i32 -3
+ %ttt..sroa_cast226 = bitcast ptr %ttt..sroa_idx to ptr
+ %ttt.copyload = load <8 x float>, ptr %ttt..sroa_cast226, align 32
- %sss..sroa_cast223 = bitcast %S* %arg to <8 x float>*
- %sss.copyload = load <8 x float>, <8 x float>* %sss..sroa_cast223, align 32
+ %sss..sroa_cast223 = bitcast ptr %arg to ptr
+ %sss.copyload = load <8 x float>, ptr %sss..sroa_cast223, align 32
%mul.i = fmul <8 x float> %ppp.copyload, %www.copyload
%mul.i185 = fmul <8 x float> %xxx.copyload, %uuu.copyload
@@ -75,31 +75,31 @@
%div.i = fdiv <8 x float> zeroinitializer, %add.i
%mul.i153 = fmul <8 x float> %uuu.copyload, %div.i
- store <8 x float> %mul.i153, <8 x float>* %ppp..sroa_cast248, align 32
+ store <8 x float> %mul.i153, ptr %ppp..sroa_cast248, align 32
%mul.i147 = fmul <8 x float> %uuu.copyload, %vvv.copyload
%mul.i141 = fmul <8 x float> %zzz.copyload, %sss.copyload
%mul.i135 = fmul <8 x float> %mul.i141, %div.i
%sub.i129 = fsub <8 x float> %mul.i147, %mul.i135
- store <8 x float> %sub.i129, <8 x float>* %zzz..sroa_cast241, align 32
- store <8 x float> %div.i, <8 x float>* %vvv..sroa_cast230, align 32
- store <8 x float> %div.i, <8 x float>* %xxx..sroa_cast248, align 32
+ store <8 x float> %sub.i129, ptr %zzz..sroa_cast241, align 32
+ store <8 x float> %div.i, ptr %vvv..sroa_cast230, align 32
+ store <8 x float> %div.i, ptr %xxx..sroa_cast248, align 32
%mul.i123 = fmul <8 x float> %yyy.copyload, %vvv.copyload
%mul.i117 = fmul <8 x float> %mul.i123, %div.i
%sub.i111 = fsub <8 x float> %sss.copyload, %mul.i117
- store <8 x float> %sub.i111, <8 x float>* %www..sroa_cast238, align 32
+ store <8 x float> %sub.i111, ptr %www..sroa_cast238, align 32
%mul.i105 = fmul <8 x float> %ppp.copyload, %ttt.copyload
%mul.i99 = fmul <8 x float> %mul.i105, %div.i
%sub.i93 = fsub <8 x float> %xxx.copyload, %mul.i99
- store <8 x float> %sub.i93, <8 x float>* %ttt..sroa_cast226, align 32
+ store <8 x float> %sub.i93, ptr %ttt..sroa_cast226, align 32
%mul.i81 = fmul <8 x float> %yyy.copyload, %www.copyload
%mul.i75 = fmul <8 x float> %mul.i81, %div.i
%sub.i = fsub <8 x float> %mul.i185, %mul.i75
- store <8 x float> %sub.i, <8 x float>* %yyy..sroa_cast244, align 32
+ store <8 x float> %sub.i, ptr %yyy..sroa_cast244, align 32
ret void
}
diff --git a/llvm/test/CodeGen/Thumb2/t2-teq-reduce.mir b/llvm/test/CodeGen/Thumb2/t2-teq-reduce.mir
index e3b4ec256d285..15fe20c653203 100644
--- a/llvm/test/CodeGen/Thumb2/t2-teq-reduce.mir
+++ b/llvm/test/CodeGen/Thumb2/t2-teq-reduce.mir
@@ -5,82 +5,82 @@
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8m.main"
- %list_head = type { %list_head*, %list_data* }
+ %list_head = type { ptr, ptr }
%list_data = type { i16, i16 }
- define %list_head* @reg_reg_it_block(%list_head* %a, i16 zeroext %b) {
+ define ptr @reg_reg_it_block(ptr %a, i16 zeroext %b) {
entry:
br label %while.begin
while.begin: ; preds = %while.body.end, %entry
- %list.addr.i = phi %list_head* [ %ld.5, %while.body.end ], [ %a, %entry ]
- %info.i = getelementptr inbounds %list_head, %list_head* %list.addr.i, i32 0, i32 1
- %ld.0 = load %list_data*, %list_data** %info.i, align 4
- %data16.i1 = bitcast %list_data* %ld.0 to i16*
- %ld.1 = load i16, i16* %data16.i1, align 2
+ %list.addr.i = phi ptr [ %ld.5, %while.body.end ], [ %a, %entry ]
+ %info.i = getelementptr inbounds %list_head, ptr %list.addr.i, i32 0, i32 1
+ %ld.0 = load ptr, ptr %info.i, align 4
+ %data16.i1 = bitcast ptr %ld.0 to ptr
+ %ld.1 = load i16, ptr %data16.i1, align 2
%xor.1 = xor i16 %ld.1, %b
%cmp.i = icmp eq i16 %xor.1, 0
br i1 %cmp.i, label %exit, label %while.body.a
while.body.a: ; preds = %while.begin
- %next.i2 = bitcast %list_head* %list.addr.i to %list_head**
- %ld.2 = load %list_head*, %list_head** %next.i2, align 4
- %cmp.i.1 = icmp eq %list_head* %ld.2, null
+ %next.i2 = bitcast ptr %list.addr.i to ptr
+ %ld.2 = load ptr, ptr %next.i2, align 4
+ %cmp.i.1 = icmp eq ptr %ld.2, null
br i1 %cmp.i.1, label %exit, label %it.block
it.block: ; preds = %while.body.a
- %info.i.1 = getelementptr inbounds %list_head, %list_head* %ld.2, i32 0, i32 1
- %ld.3 = load %list_data*, %list_data** %info.i.1, align 4
- %data16.i.13 = bitcast %list_data* %ld.3 to i16*
- %ld.4 = load i16, i16* %data16.i.13, align 2
+ %info.i.1 = getelementptr inbounds %list_head, ptr %ld.2, i32 0, i32 1
+ %ld.3 = load ptr, ptr %info.i.1, align 4
+ %data16.i.13 = bitcast ptr %ld.3 to ptr
+ %ld.4 = load i16, ptr %data16.i.13, align 2
%xor.2 = xor i16 %ld.4, %b
%cmp.i.2 = icmp eq i16 %xor.2, 0
br i1 %cmp.i.2, label %exit, label %while.body.end
while.body.end: ; preds = %it.block
- %next.i.14 = bitcast %list_head* %ld.2 to %list_head**
- %ld.5 = load %list_head*, %list_head** %next.i.14, align 4
- %cmp.i.3 = icmp eq %list_head* %ld.5, null
+ %next.i.14 = bitcast ptr %ld.2 to ptr
+ %ld.5 = load ptr, ptr %next.i.14, align 4
+ %cmp.i.3 = icmp eq ptr %ld.5, null
br i1 %cmp.i.3, label %exit, label %while.begin
exit: ; preds = %while.body.end, %it.block, %while.body.a, %while.begin
- %res = phi %list_head* [ %list.addr.i, %while.begin ], [ %ld.2, %while.body.a ], [ %ld.2, %it.block ], [ %ld.5, %while.body.end ]
- ret %list_head* %res
+ %res = phi ptr [ %list.addr.i, %while.begin ], [ %ld.2, %while.body.a ], [ %ld.2, %it.block ], [ %ld.5, %while.body.end ]
+ ret ptr %res
}
- define i16 @op_not_killed(%list_head* %a, i16 zeroext %b) {
+ define i16 @op_not_killed(ptr %a, i16 zeroext %b) {
entry:
br label %while.begin
while.begin: ; preds = %while.body.end, %entry
- %list.addr.i = phi %list_head* [ %ld.5, %while.body.end ], [ %a, %entry ]
- %info.i = getelementptr inbounds %list_head, %list_head* %list.addr.i, i32 0, i32 1
- %ld.0 = load %list_data*, %list_data** %info.i, align 4
- %data16.i1 = bitcast %list_data* %ld.0 to i16*
- %ld.1 = load i16, i16* %data16.i1, align 2
+ %list.addr.i = phi ptr [ %ld.5, %while.body.end ], [ %a, %entry ]
+ %info.i = getelementptr inbounds %list_head, ptr %list.addr.i, i32 0, i32 1
+ %ld.0 = load ptr, ptr %info.i, align 4
+ %data16.i1 = bitcast ptr %ld.0 to ptr
+ %ld.1 = load i16, ptr %data16.i1, align 2
%xor.1 = xor i16 %ld.1, %b
%cmp.i = icmp eq i16 %xor.1, 0
br i1 %cmp.i, label %exit, label %while.body.a
while.body.a: ; preds = %while.begin
- %next.i2 = bitcast %list_head* %list.addr.i to %list_head**
- %ld.2 = load %list_head*, %list_head** %next.i2, align 4
- %cmp.i.1 = icmp eq %list_head* %ld.2, null
+ %next.i2 = bitcast ptr %list.addr.i to ptr
+ %ld.2 = load ptr, ptr %next.i2, align 4
+ %cmp.i.1 = icmp eq ptr %ld.2, null
br i1 %cmp.i.1, label %exit, label %it.block
it.block: ; preds = %while.body.a
- %info.i.1 = getelementptr inbounds %list_head, %list_head* %ld.2, i32 0, i32 1
- %ld.3 = load %list_data*, %list_data** %info.i.1, align 4
- %data16.i.13 = bitcast %list_data* %ld.3 to i16*
- %ld.4 = load i16, i16* %data16.i.13, align 2
+ %info.i.1 = getelementptr inbounds %list_head, ptr %ld.2, i32 0, i32 1
+ %ld.3 = load ptr, ptr %info.i.1, align 4
+ %data16.i.13 = bitcast ptr %ld.3 to ptr
+ %ld.4 = load i16, ptr %data16.i.13, align 2
%xor.2 = xor i16 %ld.4, %b
%cmp.i.2 = icmp eq i16 %xor.2, 0
br i1 %cmp.i.2, label %exit, label %while.body.end
while.body.end: ; preds = %it.block
- %next.i.14 = bitcast %list_head* %ld.2 to %list_head**
- %ld.5 = load %list_head*, %list_head** %next.i.14, align 4
- %cmp.i.3 = icmp eq %list_head* %ld.5, null
+ %next.i.14 = bitcast ptr %ld.2 to ptr
+ %ld.5 = load ptr, ptr %next.i.14, align 4
+ %cmp.i.3 = icmp eq ptr %ld.5, null
br i1 %cmp.i.3, label %exit, label %while.begin
exit: ; preds = %while.body.end, %it.block, %while.body.a, %while.begin
More information about the llvm-commits
mailing list