[llvm] 6da3cfc - [Thumb2] Convert some tests to opaque pointers (NFC)
Sergei Barannikov via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 29 19:34:04 PST 2023
Author: Sergei Barannikov
Date: 2023-01-30T06:32:32+03:00
New Revision: 6da3cfc357dc473f10169928437e113d34c7d283
URL: https://github.com/llvm/llvm-project/commit/6da3cfc357dc473f10169928437e113d34c7d283
DIFF: https://github.com/llvm/llvm-project/commit/6da3cfc357dc473f10169928437e113d34c7d283.diff
LOG: [Thumb2] Convert some tests to opaque pointers (NFC)
Added:
Modified:
llvm/test/CodeGen/Thumb2/LowOverheadLoops/begin-vpt-without-inst.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll
llvm/test/CodeGen/Thumb2/mve-intrinsics/v2i1-upgrade.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/begin-vpt-without-inst.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/begin-vpt-without-inst.mir
index f79169a94bc3b..372fc6108129c 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/begin-vpt-without-inst.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/begin-vpt-without-inst.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -opaque-pointers=0 -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
+# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
--- |
@arr = external dso_local local_unnamed_addr global [0 x i32], align 4
@@ -15,7 +15,7 @@
vector.ph: ; preds = %vector.ph.preheader, %vector.ph
%i.addr.012 = phi i32 [ %math, %vector.ph ], [ %i, %vector.ph.preheader ]
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> <i32 2, i32 2, i32 2, i32 2>, <4 x i32>* bitcast ([0 x i32]* @arr to <4 x i32>*), i32 4, <4 x i1> %active.lane.mask)
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> <i32 2, i32 2, i32 2, i32 2>, ptr @arr, i32 4, <4 x i1> %active.lane.mask)
%0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %i.addr.012, i32 1)
%math = extractvalue { i32, i1 } %0, 0
%ov = extractvalue { i32, i1 } %0, 1
@@ -26,7 +26,7 @@
}
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
- declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
+ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
...
@@ -75,7 +75,7 @@ body: |
; CHECK: liveins: $vpr, $q0, $r0, $r1
; CHECK: renamable $r0, $cpsr = tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg
; CHECK: MVE_VPST 8, implicit $vpr
- ; CHECK: MVE_VSTRWU32 renamable $q0, renamable $r1, 0, 1, renamable $vpr, $noreg :: (store (s128) into `<4 x i32>* bitcast ([0 x i32]* @arr to <4 x i32>*)`, align 4)
+ ; CHECK: MVE_VSTRWU32 renamable $q0, renamable $r1, 0, 1, renamable $vpr, $noreg :: (store (s128) into @arr, align 4)
; CHECK: tBcc %bb.2, 3 /* CC::lo */, killed $cpsr
; CHECK: bb.3.for.end5:
; CHECK: tBX_RET 14 /* CC::al */, $noreg
@@ -105,7 +105,7 @@ body: |
renamable $r0, $cpsr = tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg
MVE_VPST 8, implicit $vpr
- MVE_VSTRWU32 renamable $q0, renamable $r1, 0, 1, renamable $vpr, $noreg :: (store (s128) into `<4 x i32>* bitcast ([0 x i32]* @arr to <4 x i32>*)`, align 4)
+ MVE_VSTRWU32 renamable $q0, renamable $r1, 0, 1, renamable $vpr, $noreg :: (store (s128) into @arr, align 4)
tBcc %bb.2, 3 /* CC::lo */, killed $cpsr
bb.3.for.end5:
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
index 2eaba6d0dca9c..99d169e63e5a5 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -opaque-pointers=0 --arm-memtransfer-tploop=allow -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s
+; RUN: llc --arm-memtransfer-tploop=allow -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s
-define void @test_memcpy(i32* nocapture %x, i32* nocapture readonly %y, i32 %n, i32 %m) {
+define void @test_memcpy(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i32 %m) {
; CHECK-LABEL: test_memcpy:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
@@ -42,49 +42,46 @@ for.cond.cleanup: ; preds = %for.body, %entry
for.body: ; preds = %entry, %for.body
%i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
- %x.addr.010 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.09 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
- %0 = bitcast i32* %x.addr.010 to i8*
- %1 = bitcast i32* %y.addr.09 to i8*
- tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 %m, i1 false)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.010, i32 %m
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.09, i32 %m
+ %x.addr.010 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.09 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
+ tail call void @llvm.memcpy.p0.p0.i32(ptr align 4 %x.addr.010, ptr align 4 %y.addr.09, i32 %m, i1 false)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.010, i32 %m
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.09, i32 %m
%inc = add nuw nsw i32 %i.011, 1
%exitcond.not = icmp eq i32 %inc, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
-define void @test_memset(i32* nocapture %x, i32 %n, i32 %m) {
+define void @test_memset(ptr nocapture %x, i32 %n, i32 %m) {
; CHECK-LABEL: test_memset:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, lr}
-; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: cmp r1, #1
; CHECK-NEXT: it lt
-; CHECK-NEXT: poplt {r4, pc}
-; CHECK-NEXT: .LBB1_1: @ %for.body.preheader
-; CHECK-NEXT: lsl.w r12, r2, #2
+; CHECK-NEXT: poplt {r7, pc}
+; CHECK-NEXT: .LBB1_1:
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: b .LBB1_2
; CHECK-NEXT: .LBB1_2: @ %for.body
; CHECK-NEXT: @ =>This Loop Header: Depth=1
; CHECK-NEXT: @ Child Loop BB1_4 Depth 2
-; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r12, r0
; CHECK-NEXT: wlstp.8 lr, r2, .LBB1_3
; CHECK-NEXT: b .LBB1_4
; CHECK-NEXT: .LBB1_3: @ %for.body
; CHECK-NEXT: @ in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: add r0, r12
+; CHECK-NEXT: add.w r0, r0, r2, lsl #2
; CHECK-NEXT: subs r1, #1
; CHECK-NEXT: beq .LBB1_5
; CHECK-NEXT: b .LBB1_2
; CHECK-NEXT: .LBB1_4: @ Parent Loop BB1_2 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
-; CHECK-NEXT: vstrb.8 q0, [r4], #16
+; CHECK-NEXT: vstrb.8 q0, [r12], #16
; CHECK-NEXT: letp lr, .LBB1_4
; CHECK-NEXT: b .LBB1_3
; CHECK-NEXT: .LBB1_5: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, pc}
+; CHECK-NEXT: pop {r7, pc}
entry:
%cmp5 = icmp sgt i32 %n, 0
br i1 %cmp5, label %for.body, label %for.cond.cleanup
@@ -94,16 +91,15 @@ for.cond.cleanup: ; preds = %for.body, %entry
for.body: ; preds = %entry, %for.body
%i.07 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
- %x.addr.06 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %0 = bitcast i32* %x.addr.06 to i8*
- tail call void @llvm.memset.p0i8.i32(i8* align 4 %0, i8 0, i32 %m, i1 false)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.06, i32 %m
+ %x.addr.06 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ tail call void @llvm.memset.p0.i32(ptr align 4 %x.addr.06, i8 0, i32 %m, i1 false)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.06, i32 %m
%inc = add nuw nsw i32 %i.07, 1
%exitcond.not = icmp eq i32 %inc, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
-define void @test_memmove(i32* nocapture %x, i32* nocapture readonly %y, i32 %n, i32 %m) {
+define void @test_memmove(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i32 %m) {
; CHECK-LABEL: test_memmove:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
@@ -140,20 +136,18 @@ for.cond.cleanup: ; preds = %for.body, %entry
for.body: ; preds = %entry, %for.body
%i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
- %x.addr.010 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.09 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
- %0 = bitcast i32* %x.addr.010 to i8*
- %1 = bitcast i32* %y.addr.09 to i8*
- tail call void @llvm.memmove.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 %m, i1 false)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.010, i32 %m
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.09, i32 %m
+ %x.addr.010 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.09 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
+ tail call void @llvm.memmove.p0.p0.i32(ptr align 4 %x.addr.010, ptr align 4 %y.addr.09, i32 %m, i1 false)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.010, i32 %m
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.09, i32 %m
%inc = add nuw nsw i32 %i.011, 1
%exitcond.not = icmp eq i32 %inc, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
-define void @test_memcpy16(i32* nocapture %x, i32* nocapture readonly %y, i32 %n) {
+define void @test_memcpy16(ptr nocapture %x, ptr nocapture readonly %y, i32 %n) {
; CHECK-LABEL: test_memcpy16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
@@ -183,19 +177,17 @@ for.cond.cleanup: ; preds = %for.body, %entry
for.body: ; preds = %entry, %for.body
%i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
- %x.addr.08 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.07 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
- %0 = bitcast i32* %x.addr.08 to i8*
- %1 = bitcast i32* %y.addr.07 to i8*
- tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* nonnull align 4 dereferenceable(16) %0, i8* nonnull align 4 dereferenceable(16) %1, i32 16, i1 false)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.08, i32 16
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.07, i32 16
+ %x.addr.08 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.07 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
+ tail call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 4 dereferenceable(16) %x.addr.08, ptr nonnull align 4 dereferenceable(16) %y.addr.07, i32 16, i1 false)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.08, i32 16
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.07, i32 16
%inc = add nuw nsw i32 %i.09, 1
%exitcond.not = icmp eq i32 %inc, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
-define void @test_memset16(i32* nocapture %x, i32 %n) {
+define void @test_memset16(ptr nocapture %x, i32 %n) {
; CHECK-LABEL: test_memset16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
@@ -223,16 +215,15 @@ for.cond.cleanup: ; preds = %for.body, %entry
for.body: ; preds = %entry, %for.body
%i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
- %x.addr.05 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %0 = bitcast i32* %x.addr.05 to i8*
- tail call void @llvm.memset.p0i8.i32(i8* nonnull align 4 dereferenceable(16) %0, i8 0, i32 16, i1 false)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.05, i32 16
+ %x.addr.05 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ tail call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(16) %x.addr.05, i8 0, i32 16, i1 false)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.05, i32 16
%inc = add nuw nsw i32 %i.06, 1
%exitcond.not = icmp eq i32 %inc, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
-define void @test_memmove16(i32* nocapture %x, i32* nocapture readonly %y, i32 %n) {
+define void @test_memmove16(ptr nocapture %x, ptr nocapture readonly %y, i32 %n) {
; CHECK-LABEL: test_memmove16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
@@ -262,19 +253,17 @@ for.cond.cleanup: ; preds = %for.body, %entry
for.body: ; preds = %entry, %for.body
%i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
- %x.addr.08 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
- %y.addr.07 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
- %0 = bitcast i32* %x.addr.08 to i8*
- %1 = bitcast i32* %y.addr.07 to i8*
- tail call void @llvm.memmove.p0i8.p0i8.i32(i8* nonnull align 4 dereferenceable(16) %0, i8* nonnull align 4 dereferenceable(16) %1, i32 16, i1 false)
- %add.ptr = getelementptr inbounds i32, i32* %x.addr.08, i32 16
- %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.07, i32 16
+ %x.addr.08 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+ %y.addr.07 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
+ tail call void @llvm.memmove.p0.p0.i32(ptr nonnull align 4 dereferenceable(16) %x.addr.08, ptr nonnull align 4 dereferenceable(16) %y.addr.07, i32 16, i1 false)
+ %add.ptr = getelementptr inbounds i32, ptr %x.addr.08, i32 16
+ %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.07, i32 16
%inc = add nuw nsw i32 %i.09, 1
%exitcond.not = icmp eq i32 %inc, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
-define void @test_memset_preheader(i8* %x, i8* %y, i32 %n) {
+define void @test_memset_preheader(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: test_memset_preheader:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
@@ -308,28 +297,28 @@ entry:
br i1 %cmp6, label %prehead, label %for.cond.cleanup
prehead:
- call void @llvm.memset.p0i8.i32(i8* %x, i8 0, i32 %n, i1 false)
+ call void @llvm.memset.p0.i32(ptr %x, i8 0, i32 %n, i1 false)
br label %for.body
for.body: ; preds = %entry, %for.body
%i.09 = phi i32 [ %inc, %for.body ], [ 0, %prehead ]
- %x.addr.08 = phi i8* [ %add.ptr, %for.body ], [ %x, %prehead ]
- %y.addr.07 = phi i8* [ %add.ptr1, %for.body ], [ %y, %prehead ]
- %add.ptr = getelementptr inbounds i8, i8* %x.addr.08, i32 1
- %add.ptr1 = getelementptr inbounds i8, i8* %y.addr.07, i32 1
- %l = load i8, i8* %x.addr.08
- store i8 %l, i8* %y.addr.07
+ %x.addr.08 = phi ptr [ %add.ptr, %for.body ], [ %x, %prehead ]
+ %y.addr.07 = phi ptr [ %add.ptr1, %for.body ], [ %y, %prehead ]
+ %add.ptr = getelementptr inbounds i8, ptr %x.addr.08, i32 1
+ %add.ptr1 = getelementptr inbounds i8, ptr %y.addr.07, i32 1
+ %l = load i8, ptr %x.addr.08
+ store i8 %l, ptr %y.addr.07
%inc = add nuw nsw i32 %i.09, 1
%exitcond.not = icmp eq i32 %inc, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
for.cond.cleanup: ; preds = %for.body, %entry
- call void @llvm.memset.p0i8.i32(i8* %x, i8 0, i32 %n, i1 false)
+ call void @llvm.memset.p0.i32(ptr %x, i8 0, i32 %n, i1 false)
ret void
}
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i32, i1 immarg)
-declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg)
-declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1 immarg)
+declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg)
+declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg)
+declare void @llvm.memmove.p0.p0.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1 immarg)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll
index a687eac32dfce..9ef5a46edf934 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -opaque-pointers=0 -mtriple=thumbv8.1m.main -mattr=+mve --verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve --verify-machineinstrs %s -o - | FileCheck %s
%struct.arm_2d_size_t = type { i16, i16 }
-define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(i16* noalias nocapture %phwTargetBase, i16 signext %iTargetStride, %struct.arm_2d_size_t* noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) {
+define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(ptr noalias nocapture %phwTargetBase, i16 signext %iTargetStride, ptr noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) {
; CHECK-LABEL: __arm_2d_impl_rgb16_colour_filling_with_alpha:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
@@ -37,12 +37,11 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(i16* noalias nocaptur
; CHECK-NEXT: subs r3, #8
; CHECK-NEXT: movs r4, #1
; CHECK-NEXT: vdup.16 q0, r5
-; CHECK-NEXT: lsls r1, r1, #1
+; CHECK-NEXT: vdup.16 q5, r6
; CHECK-NEXT: add.w r3, r4, r3, lsr #3
; CHECK-NEXT: vstrw.32 q0, [sp, #48] @ 16-byte Spill
; CHECK-NEXT: vmov.i16 q0, #0xf800
; CHECK-NEXT: movs r4, #0
-; CHECK-NEXT: vdup.16 q5, r6
; CHECK-NEXT: vmov.i16 q7, #0x78
; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill
; CHECK-NEXT: vstrw.32 q2, [sp, #32] @ 16-byte Spill
@@ -94,7 +93,7 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(i16* noalias nocaptur
; CHECK-NEXT: @ %bb.4: @ %for.cond3.for.cond.cleanup7_crit_edge.us
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: adds r4, #1
-; CHECK-NEXT: add r0, r1
+; CHECK-NEXT: add.w r0, r0, r1, lsl #1
; CHECK-NEXT: cmp r4, r12
; CHECK-NEXT: bne .LBB0_2
; CHECK-NEXT: .LBB0_5: @ %for.cond.cleanup
@@ -103,8 +102,8 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(i16* noalias nocaptur
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
entry:
- %iHeight = getelementptr inbounds %struct.arm_2d_size_t, %struct.arm_2d_size_t* %ptCopySize, i32 0, i32 1
- %0 = load i16, i16* %iHeight, align 2
+ %iHeight = getelementptr inbounds %struct.arm_2d_size_t, ptr %ptCopySize, i32 0, i32 1
+ %0 = load i16, ptr %iHeight, align 2
%conv1 = sext i16 %0 to i32
%and.i = shl i16 %hwColour, 3
%shl.i = and i16 %and.i, 248
@@ -119,8 +118,7 @@ entry:
br i1 %cmp61, label %for.cond3.preheader.lr.ph, label %for.cond.cleanup
for.cond3.preheader.lr.ph: ; preds = %entry
- %iWidth = getelementptr inbounds %struct.arm_2d_size_t, %struct.arm_2d_size_t* %ptCopySize, i32 0, i32 0
- %6 = load i16, i16* %iWidth, align 2
+ %6 = load i16, ptr %ptCopySize, align 2
%conv4 = sext i16 %6 to i32
%cmp558 = icmp sgt i16 %6, 0
br i1 %cmp558, label %for.cond3.preheader.us.preheader, label %for.cond.cleanup
@@ -142,42 +140,40 @@ for.cond3.preheader.us.preheader: ; preds = %for.cond3.preheader
br label %vector.ph
vector.ph: ; preds = %for.cond3.for.cond.cleanup7_crit_edge.us, %for.cond3.preheader.us.preheader
- %phwTargetBase.addr.063.us = phi i16* [ %add.ptr.us, %for.cond3.for.cond.cleanup7_crit_edge.us ], [ %phwTargetBase, %for.cond3.preheader.us.preheader ]
+ %phwTargetBase.addr.063.us = phi ptr [ %add.ptr.us, %for.cond3.for.cond.cleanup7_crit_edge.us ], [ %phwTargetBase, %for.cond3.preheader.us.preheader ]
%y.062.us = phi i32 [ %inc32.us, %for.cond3.for.cond.cleanup7_crit_edge.us ], [ 0, %for.cond3.preheader.us.preheader ]
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %next.gep = getelementptr i16, i16* %phwTargetBase.addr.063.us, i32 %index
+ %next.gep = getelementptr i16, ptr %phwTargetBase.addr.063.us, i32 %index
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %conv4)
- %7 = bitcast i16* %next.gep to <8 x i16>*
- %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %7, i32 2, <8 x i1> %active.lane.mask, <8 x i16> poison)
- %8 = shl <8 x i16> %wide.masked.load, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
- %9 = and <8 x i16> %8, <i16 248, i16 248, i16 248, i16 248, i16 248, i16 248, i16 248, i16 248>
- %10 = lshr <8 x i16> %wide.masked.load, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
- %11 = and <8 x i16> %10, <i16 120, i16 120, i16 120, i16 120, i16 120, i16 120, i16 120, i16 120>
- %12 = lshr <8 x i16> %wide.masked.load, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
- %13 = and <8 x i16> %12, <i16 252, i16 252, i16 252, i16 252, i16 252, i16 252, i16 252, i16 252>
- %14 = mul <8 x i16> %9, %broadcast.splat76
- %15 = add <8 x i16> %14, %broadcast.splat78
- %16 = lshr <8 x i16> %15, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
- %17 = mul <8 x i16> %13, %broadcast.splat76
- %18 = add <8 x i16> %17, %broadcast.splat80
- %19 = mul <8 x i16> %11, %broadcast.splat76
- %20 = add <8 x i16> %19, %broadcast.splat82
- %21 = lshr <8 x i16> %18, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
- %22 = and <8 x i16> %21, <i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016>
- %23 = or <8 x i16> %22, %16
- %24 = and <8 x i16> %20, <i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048>
- %25 = or <8 x i16> %23, %24
- %26 = bitcast i16* %next.gep to <8 x i16>*
- call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %25, <8 x i16>* %26, i32 2, <8 x i1> %active.lane.mask)
+ %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %next.gep, i32 2, <8 x i1> %active.lane.mask, <8 x i16> poison)
+ %7 = shl <8 x i16> %wide.masked.load, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ %8 = and <8 x i16> %7, <i16 248, i16 248, i16 248, i16 248, i16 248, i16 248, i16 248, i16 248>
+ %9 = lshr <8 x i16> %wide.masked.load, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+ %10 = and <8 x i16> %9, <i16 120, i16 120, i16 120, i16 120, i16 120, i16 120, i16 120, i16 120>
+ %11 = lshr <8 x i16> %wide.masked.load, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ %12 = and <8 x i16> %11, <i16 252, i16 252, i16 252, i16 252, i16 252, i16 252, i16 252, i16 252>
+ %13 = mul <8 x i16> %8, %broadcast.splat76
+ %14 = add <8 x i16> %13, %broadcast.splat78
+ %15 = lshr <8 x i16> %14, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
+ %16 = mul <8 x i16> %12, %broadcast.splat76
+ %17 = add <8 x i16> %16, %broadcast.splat80
+ %18 = mul <8 x i16> %10, %broadcast.splat76
+ %19 = add <8 x i16> %18, %broadcast.splat82
+ %20 = lshr <8 x i16> %17, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+ %21 = and <8 x i16> %20, <i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016>
+ %22 = or <8 x i16> %21, %15
+ %23 = and <8 x i16> %19, <i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048>
+ %24 = or <8 x i16> %22, %23
+ call void @llvm.masked.store.v8i16.p0(<8 x i16> %24, ptr %next.gep, i32 2, <8 x i1> %active.lane.mask)
%index.next = add i32 %index, 8
- %27 = icmp eq i32 %index.next, %n.vec
- br i1 %27, label %for.cond3.for.cond.cleanup7_crit_edge.us, label %vector.body
+ %25 = icmp eq i32 %index.next, %n.vec
+ br i1 %25, label %for.cond3.for.cond.cleanup7_crit_edge.us, label %vector.body
for.cond3.for.cond.cleanup7_crit_edge.us: ; preds = %vector.body
- %add.ptr.us = getelementptr inbounds i16, i16* %phwTargetBase.addr.063.us, i32 %conv30
+ %add.ptr.us = getelementptr inbounds i16, ptr %phwTargetBase.addr.063.us, i32 %conv30
%inc32.us = add nuw nsw i32 %y.062.us, 1
%exitcond66.not = icmp eq i32 %inc32.us, %conv1
br i1 %exitcond66.not, label %for.cond.cleanup, label %vector.ph
@@ -185,7 +181,7 @@ for.cond3.for.cond.cleanup7_crit_edge.us: ; preds = %vector.body
for.cond.cleanup: ; preds = %for.cond3.for.cond.cleanup7_crit_edge.us, %for.cond3.preheader.lr.ph, %entry
ret void
}
-define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(i16* noalias nocapture %phwTargetBase, i16 signext %iTargetStride, %struct.arm_2d_size_t* noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) "target-cpu"="cortex-m55" {
+define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(ptr noalias nocapture %phwTargetBase, i16 signext %iTargetStride, ptr noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) "target-cpu"="cortex-m55" {
; CHECK-LABEL: __arm_2d_impl_rgb16_colour_filling_with_alpha_sched:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
@@ -219,7 +215,6 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(i16* noalias no
; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill
; CHECK-NEXT: vdup.16 q0, r5
; CHECK-NEXT: rsb.w r3, r7, #256
-; CHECK-NEXT: lsls r7, r1, #1
; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
; CHECK-NEXT: vdup.16 q0, r6
; CHECK-NEXT: vmov.i16 q2, #0xf8
@@ -266,8 +261,8 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(i16* noalias no
; CHECK-NEXT: letp lr, .LBB1_4
; CHECK-NEXT: @ %bb.5: @ %for.cond3.for.cond.cleanup7_crit_edge.us
; CHECK-NEXT: @ in Loop: Header=BB1_3 Depth=1
+; CHECK-NEXT: add.w r0, r0, r1, lsl #1
; CHECK-NEXT: adds r4, #1
-; CHECK-NEXT: add r0, r7
; CHECK-NEXT: cmp r4, r12
; CHECK-NEXT: bne .LBB1_3
; CHECK-NEXT: .LBB1_6: @ %for.cond.cleanup
@@ -276,8 +271,8 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(i16* noalias no
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
entry:
- %iHeight = getelementptr inbounds %struct.arm_2d_size_t, %struct.arm_2d_size_t* %ptCopySize, i32 0, i32 1
- %0 = load i16, i16* %iHeight, align 2
+ %iHeight = getelementptr inbounds %struct.arm_2d_size_t, ptr %ptCopySize, i32 0, i32 1
+ %0 = load i16, ptr %iHeight, align 2
%conv1 = sext i16 %0 to i32
%and.i = shl i16 %hwColour, 3
%shl.i = and i16 %and.i, 248
@@ -292,8 +287,7 @@ entry:
br i1 %cmp61, label %for.cond3.preheader.lr.ph, label %for.cond.cleanup
for.cond3.preheader.lr.ph: ; preds = %entry
- %iWidth = getelementptr inbounds %struct.arm_2d_size_t, %struct.arm_2d_size_t* %ptCopySize, i32 0, i32 0
- %6 = load i16, i16* %iWidth, align 2
+ %6 = load i16, ptr %ptCopySize, align 2
%conv4 = sext i16 %6 to i32
%cmp558 = icmp sgt i16 %6, 0
br i1 %cmp558, label %for.cond3.preheader.us.preheader, label %for.cond.cleanup
@@ -315,42 +309,40 @@ for.cond3.preheader.us.preheader: ; preds = %for.cond3.preheader
br label %vector.ph
vector.ph: ; preds = %for.cond3.for.cond.cleanup7_crit_edge.us, %for.cond3.preheader.us.preheader
- %phwTargetBase.addr.063.us = phi i16* [ %add.ptr.us, %for.cond3.for.cond.cleanup7_crit_edge.us ], [ %phwTargetBase, %for.cond3.preheader.us.preheader ]
+ %phwTargetBase.addr.063.us = phi ptr [ %add.ptr.us, %for.cond3.for.cond.cleanup7_crit_edge.us ], [ %phwTargetBase, %for.cond3.preheader.us.preheader ]
%y.062.us = phi i32 [ %inc32.us, %for.cond3.for.cond.cleanup7_crit_edge.us ], [ 0, %for.cond3.preheader.us.preheader ]
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %next.gep = getelementptr i16, i16* %phwTargetBase.addr.063.us, i32 %index
+ %next.gep = getelementptr i16, ptr %phwTargetBase.addr.063.us, i32 %index
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %conv4)
- %7 = bitcast i16* %next.gep to <8 x i16>*
- %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %7, i32 2, <8 x i1> %active.lane.mask, <8 x i16> poison)
- %8 = shl <8 x i16> %wide.masked.load, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
- %9 = and <8 x i16> %8, <i16 248, i16 248, i16 248, i16 248, i16 248, i16 248, i16 248, i16 248>
- %10 = lshr <8 x i16> %wide.masked.load, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
- %11 = and <8 x i16> %10, <i16 120, i16 120, i16 120, i16 120, i16 120, i16 120, i16 120, i16 120>
- %12 = lshr <8 x i16> %wide.masked.load, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
- %13 = and <8 x i16> %12, <i16 252, i16 252, i16 252, i16 252, i16 252, i16 252, i16 252, i16 252>
- %14 = mul <8 x i16> %9, %broadcast.splat76
- %15 = add <8 x i16> %14, %broadcast.splat78
- %16 = lshr <8 x i16> %15, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
- %17 = mul <8 x i16> %13, %broadcast.splat76
- %18 = add <8 x i16> %17, %broadcast.splat80
- %19 = mul <8 x i16> %11, %broadcast.splat76
- %20 = add <8 x i16> %19, %broadcast.splat82
- %21 = lshr <8 x i16> %18, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
- %22 = and <8 x i16> %21, <i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016>
- %23 = or <8 x i16> %22, %16
- %24 = and <8 x i16> %20, <i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048>
- %25 = or <8 x i16> %23, %24
- %26 = bitcast i16* %next.gep to <8 x i16>*
- call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %25, <8 x i16>* %26, i32 2, <8 x i1> %active.lane.mask)
+ %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %next.gep, i32 2, <8 x i1> %active.lane.mask, <8 x i16> poison)
+ %7 = shl <8 x i16> %wide.masked.load, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ %8 = and <8 x i16> %7, <i16 248, i16 248, i16 248, i16 248, i16 248, i16 248, i16 248, i16 248>
+ %9 = lshr <8 x i16> %wide.masked.load, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+ %10 = and <8 x i16> %9, <i16 120, i16 120, i16 120, i16 120, i16 120, i16 120, i16 120, i16 120>
+ %11 = lshr <8 x i16> %wide.masked.load, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ %12 = and <8 x i16> %11, <i16 252, i16 252, i16 252, i16 252, i16 252, i16 252, i16 252, i16 252>
+ %13 = mul <8 x i16> %8, %broadcast.splat76
+ %14 = add <8 x i16> %13, %broadcast.splat78
+ %15 = lshr <8 x i16> %14, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
+ %16 = mul <8 x i16> %12, %broadcast.splat76
+ %17 = add <8 x i16> %16, %broadcast.splat80
+ %18 = mul <8 x i16> %10, %broadcast.splat76
+ %19 = add <8 x i16> %18, %broadcast.splat82
+ %20 = lshr <8 x i16> %17, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+ %21 = and <8 x i16> %20, <i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016>
+ %22 = or <8 x i16> %21, %15
+ %23 = and <8 x i16> %19, <i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048>
+ %24 = or <8 x i16> %22, %23
+ call void @llvm.masked.store.v8i16.p0(<8 x i16> %24, ptr %next.gep, i32 2, <8 x i1> %active.lane.mask)
%index.next = add i32 %index, 8
- %27 = icmp eq i32 %index.next, %n.vec
- br i1 %27, label %for.cond3.for.cond.cleanup7_crit_edge.us, label %vector.body
+ %25 = icmp eq i32 %index.next, %n.vec
+ br i1 %25, label %for.cond3.for.cond.cleanup7_crit_edge.us, label %vector.body
for.cond3.for.cond.cleanup7_crit_edge.us: ; preds = %vector.body
- %add.ptr.us = getelementptr inbounds i16, i16* %phwTargetBase.addr.063.us, i32 %conv30
+ %add.ptr.us = getelementptr inbounds i16, ptr %phwTargetBase.addr.063.us, i32 %conv30
%inc32.us = add nuw nsw i32 %y.062.us, 1
%exitcond66.not = icmp eq i32 %inc32.us, %conv1
br i1 %exitcond66.not, label %for.cond.cleanup, label %vector.ph
@@ -360,5 +352,5 @@ for.cond.cleanup: ; preds = %for.cond3.for.cond.
}
declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) #1
-declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>) #2
-declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>) #3
+declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>) #2
+declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>) #3
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll
index c626b3b4d12fe..fa6a66b95f654 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll
@@ -1,4 +1,4 @@
-; RUN: opt -opaque-pointers=0 -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s
; CHECK-LABEL: reduction_i32
; CHECK: phi i32 [ 0, %vector.ph ]
@@ -7,9 +7,9 @@
; CHECK: [[PHI:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[ELEMS:%[^ ]+]], %vector.body ]
; CHECK: [[VCTP:%[^ ]+]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[PHI]])
; CHECK: [[ELEMS]] = sub i32 [[PHI]], 8
-; CHECK: call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp4, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef)
-; CHECK: call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp6, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef)
-define i16 @reduction_i32(i16* nocapture readonly %A, i16* nocapture readonly %B, i32 %N) {
+; CHECK: call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp2, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef)
+; CHECK: call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp5, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef)
+define i16 @reduction_i32(ptr nocapture readonly %A, ptr nocapture readonly %B, i32 %N) {
entry:
%cmp8 = icmp eq i32 %N, 0
br i1 %cmp8, label %for.cond.cleanup, label %vector.ph
@@ -28,13 +28,11 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph], [ %index.next, %vector.body ]
%vec.phi = phi <8 x i16> [ zeroinitializer, %vector.ph], [ %tmp8, %vector.body ]
%3 = phi i32 [ %start, %vector.ph], [ %4, %vector.body ]
- %tmp2 = getelementptr inbounds i16, i16* %A, i32 %index
+ %tmp2 = getelementptr inbounds i16, ptr %A, i32 %index
%tmp3 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N)
- %tmp4 = bitcast i16* %tmp2 to <8 x i16>*
- %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp4, i32 4, <8 x i1> %tmp3, <8 x i16> undef)
- %tmp5 = getelementptr inbounds i16, i16* %B, i32 %index
- %tmp6 = bitcast i16* %tmp5 to <8 x i16>*
- %wide.masked.load3 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp6, i32 4, <8 x i1> %tmp3, <8 x i16> undef)
+ %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp2, i32 4, <8 x i1> %tmp3, <8 x i16> undef)
+ %tmp5 = getelementptr inbounds i16, ptr %B, i32 %index
+ %wide.masked.load3 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp5, i32 4, <8 x i1> %tmp3, <8 x i16> undef)
%tmp7 = add <8 x i16> %wide.masked.load, %vec.phi
%tmp8 = add <8 x i16> %tmp7, %wide.masked.load3
%index.next = add i32 %index, 8
@@ -69,8 +67,8 @@ for.cond.cleanup:
; CHECK: [[PHI:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[ELEMS:%[^ ]+]], %vector.body ]
; CHECK: [[VCTP:%[^ ]+]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[PHI]])
; CHECK: [[ELEMS]] = sub i32 [[PHI]], 8
-; CHECK: call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp4, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef)
-define i16 @reduction_i32_with_scalar(i16* nocapture readonly %A, i16 %B, i32 %N) local_unnamed_addr {
+; CHECK: call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp2, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef)
+define i16 @reduction_i32_with_scalar(ptr nocapture readonly %A, i16 %B, i32 %N) local_unnamed_addr {
entry:
%cmp8 = icmp eq i32 %N, 0
br i1 %cmp8, label %for.cond.cleanup, label %vector.ph
@@ -91,10 +89,9 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph], [ %index.next, %vector.body ]
%vec.phi = phi <8 x i16> [ zeroinitializer, %vector.ph], [ %tmp6, %vector.body ]
%3 = phi i32 [ %start, %vector.ph], [ %4, %vector.body ]
- %tmp2 = getelementptr inbounds i16, i16* %A, i32 %index
+ %tmp2 = getelementptr inbounds i16, ptr %A, i32 %index
%tmp3 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N)
- %tmp4 = bitcast i16* %tmp2 to <8 x i16>*
- %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp4, i32 4, <8 x i1> %tmp3, <8 x i16> undef)
+ %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp2, i32 4, <8 x i1> %tmp3, <8 x i16> undef)
%tmp5 = add <8 x i16> %vec.phi, %broadcast.splat4
%tmp6 = add <8 x i16> %tmp5, %wide.masked.load
%index.next = add nuw nsw i32 %index, 8
@@ -130,7 +127,7 @@ for.cond.cleanup:
; CHECK-NOT: @llvm.get.active.lane.mask.v8i1.i32
; CHECK: ret
;
-define i16 @reduction_not_guarded(i16* nocapture readonly %A, i16 %B, i32 %N) local_unnamed_addr {
+define i16 @reduction_not_guarded(ptr nocapture readonly %A, i16 %B, i32 %N) local_unnamed_addr {
entry:
%tmp = add i32 %N, -1
%n.rnd.up = add nuw nsw i32 %tmp, 8
@@ -147,10 +144,9 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %entry], [ %index.next, %vector.body ]
%vec.phi = phi <8 x i16> [ zeroinitializer, %entry], [ %tmp6, %vector.body ]
%3 = phi i32 [ %start, %entry ], [ %4, %vector.body ]
- %tmp2 = getelementptr inbounds i16, i16* %A, i32 %index
+ %tmp2 = getelementptr inbounds i16, ptr %A, i32 %index
%tmp3 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N)
- %tmp4 = bitcast i16* %tmp2 to <8 x i16>*
- %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp4, i32 4, <8 x i1> %tmp3, <8 x i16> undef)
+ %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp2, i32 4, <8 x i1> %tmp3, <8 x i16> undef)
%tmp5 = add <8 x i16> %vec.phi, %broadcast.splat4
%tmp6 = add <8 x i16> %tmp5, %wide.masked.load
%index.next = add nuw nsw i32 %index, 8
@@ -175,7 +171,7 @@ middle.block: ; preds = %vector.body
; CHECK: @llvm.arm.mve.vctp
; CHECK-NOT: %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask
;
-define dso_local void @Correlation(i16* nocapture readonly %Input, i16* nocapture %Output, i16 signext %Size, i16 signext %N, i16 signext %Scale) local_unnamed_addr #0 {
+define dso_local void @Correlation(ptr nocapture readonly %Input, ptr nocapture %Output, i16 signext %Size, i16 signext %N, i16 signext %Scale) local_unnamed_addr #0 {
entry:
%conv = sext i16 %N to i32
%cmp36 = icmp sgt i16 %N, 0
@@ -189,7 +185,7 @@ for.body.lr.ph:
for.body:
%lsr.iv51 = phi i32 [ %lsr.iv.next, %for.end ], [ %0, %for.body.lr.ph ]
- %lsr.iv46 = phi i16* [ %scevgep47, %for.end ], [ %Input, %for.body.lr.ph ]
+ %lsr.iv46 = phi ptr [ %scevgep47, %for.end ], [ %Input, %for.body.lr.ph ]
%i.037 = phi i32 [ 0, %for.body.lr.ph ], [ %inc16, %for.end ]
%1 = mul nsw i32 %i.037, -1
%2 = add i32 %0, %1
@@ -207,17 +203,15 @@ vector.ph: ; preds = %for.body
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
- %lsr.iv48 = phi i16* [ %scevgep49, %vector.body ], [ %lsr.iv46, %vector.ph ]
- %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %Input, %vector.ph ]
+ %lsr.iv48 = phi ptr [ %scevgep49, %vector.body ], [ %lsr.iv46, %vector.ph ]
+ %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %Input, %vector.ph ]
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %16, %vector.body ]
%9 = phi i32 [ %start, %vector.ph ], [ %17, %vector.body ]
- %lsr.iv4850 = bitcast i16* %lsr.iv48 to <4 x i16>*
- %lsr.iv45 = bitcast i16* %lsr.iv to <4 x i16>*
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %8)
- %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv45, i32 2, <4 x i1> %active.lane.mask, <4 x i16> undef)
+ %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv, i32 2, <4 x i1> %active.lane.mask, <4 x i16> undef)
%10 = sext <4 x i16> %wide.masked.load to <4 x i32>
- %wide.masked.load42 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv4850, i32 2, <4 x i1> %active.lane.mask, <4 x i16> undef)
+ %wide.masked.load42 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv48, i32 2, <4 x i1> %active.lane.mask, <4 x i16> undef)
%11 = sext <4 x i16> %wide.masked.load42 to <4 x i32>
%12 = mul nsw <4 x i32> %11, %10
%13 = insertelement <4 x i32> undef, i32 %conv1032, i32 0
@@ -225,8 +219,8 @@ vector.body: ; preds = %vector.body, %vecto
%15 = ashr <4 x i32> %12, %14
%16 = add <4 x i32> %15, %vec.phi
%index.next = add i32 %index, 4
- %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
- %scevgep49 = getelementptr i16, i16* %lsr.iv48, i32 4
+ %scevgep = getelementptr i16, ptr %lsr.iv, i32 4
+ %scevgep49 = getelementptr i16, ptr %lsr.iv48, i32 4
%17 = call i32 @llvm.loop.decrement.reg.i32(i32 %9, i32 1)
%18 = icmp ne i32 %17, 0
br i1 %18, label %vector.body, label %middle.block
@@ -240,10 +234,10 @@ for.end: ; preds = %middle.block, %for.
%Sum.0.lcssa = phi i32 [ 0, %for.body ], [ %20, %middle.block ]
%21 = lshr i32 %Sum.0.lcssa, 16
%conv13 = trunc i32 %21 to i16
- %arrayidx14 = getelementptr inbounds i16, i16* %Output, i32 %i.037
- store i16 %conv13, i16* %arrayidx14, align 2
+ %arrayidx14 = getelementptr inbounds i16, ptr %Output, i32 %i.037
+ store i16 %conv13, ptr %arrayidx14, align 2
%inc16 = add nuw nsw i32 %i.037, 1
- %scevgep47 = getelementptr i16, i16* %lsr.iv46, i32 1
+ %scevgep47 = getelementptr i16, ptr %lsr.iv46, i32 1
%lsr.iv.next = add i32 %lsr.iv51, -1
%exitcond39 = icmp eq i32 %inc16, %conv
br i1 %exitcond39, label %for.end17, label %for.body
@@ -252,11 +246,11 @@ for.end17: ; preds = %for.end, %entry
ret void
}
-declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>)
+declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
-declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
+declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/v2i1-upgrade.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/v2i1-upgrade.ll
index a5d4830f5e62f..75595e6e5876b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/v2i1-upgrade.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/v2i1-upgrade.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -opaque-pointers=0 -S -o - %s | FileCheck %s
+; RUN: opt -S -o - %s | FileCheck %s
declare <4 x i1> @llvm.arm.mve.vctp64(i32)
declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
@@ -9,10 +9,10 @@ declare <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1(<4 x i32>, <
declare <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>)
declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>)
-declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64*, <2 x i64>, i32, i32, i32, <4 x i1>)
+declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1(ptr, <2 x i64>, i32, i32, i32, <4 x i1>)
declare void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>)
declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>)
-declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64*, <2 x i64>, <2 x i64>, i32, i32, <4 x i1>)
+declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1(ptr, <2 x i64>, <2 x i64>, i32, i32, <4 x i1>)
declare <2 x i64> @llvm.arm.cde.vcx1q.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, i32 immarg, <4 x i1>)
declare <2 x i64> @llvm.arm.cde.vcx1qa.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, i32 immarg, <4 x i1>)
@@ -112,45 +112,43 @@ entry:
ret <2 x i64> %2
}
-define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_s64(<2 x i64>* %addr, i16 zeroext %p) {
+define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_s64(ptr %addr, i16 zeroext %p) {
; CHECK-LABEL: @test_vldrdq_gather_base_wb_z_s64(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[ADDR:%.*]], align 8
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> [[TMP0]], i32 664, <2 x i1> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP5]], 1
-; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[ADDR]], align 8
+; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr [[ADDR]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP5]], 0
; CHECK-NEXT: ret <2 x i64> [[TMP7]]
;
entry:
- %0 = load <2 x i64>, <2 x i64>* %addr, align 8
+ %0 = load <2 x i64>, ptr %addr, align 8
%1 = zext i16 %p to i32
%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
%3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 664, <4 x i1> %2)
%4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1
- store <2 x i64> %4, <2 x i64>* %addr, align 8
+ store <2 x i64> %4, ptr %addr, align 8
%5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0
ret <2 x i64> %5
}
-define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_s64(i64* %base, <2 x i64> %offset, i16 zeroext %p) {
+define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_s64(ptr %base, <2 x i64> %offset, i16 zeroext %p) {
; CHECK-LABEL: @test_vldrdq_gather_offset_z_s64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]])
-; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]])
-; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v2i1(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 0, i32 0, <2 x i1> [[TMP3]])
-; CHECK-NEXT: ret <2 x i64> [[TMP4]]
+; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1(ptr [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 0, i32 0, <4 x i1> [[TMP1]])
+; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
entry:
%0 = zext i16 %p to i32
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
- %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 0, <4 x i1> %1)
+ %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1(ptr %base, <2 x i64> %offset, i32 64, i32 0, i32 0, <4 x i1> %1)
ret <2 x i64> %2
}
@@ -171,41 +169,39 @@ entry:
ret void
}
-define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_s64(<2 x i64>* %addr, <2 x i64> %value, i16 zeroext %p) {
+define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_s64(ptr %addr, <2 x i64> %value, i16 zeroext %p) {
; CHECK-LABEL: @test_vstrdq_scatter_base_wb_p_s64(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[ADDR:%.*]], align 8
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> [[TMP0]], i32 248, <2 x i64> [[VALUE:%.*]], <2 x i1> [[TMP4]])
-; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[ADDR]], align 8
+; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[ADDR]], align 8
; CHECK-NEXT: ret void
;
entry:
- %0 = load <2 x i64>, <2 x i64>* %addr, align 8
+ %0 = load <2 x i64>, ptr %addr, align 8
%1 = zext i16 %p to i32
%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
%3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 248, <2 x i64> %value, <4 x i1> %2)
- store <2 x i64> %3, <2 x i64>* %addr, align 8
+ store <2 x i64> %3, ptr %addr, align 8
ret void
}
-define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
+define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_s64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
; CHECK-LABEL: @test_vstrdq_scatter_offset_p_s64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]])
-; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]])
-; CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v2i1(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 0, <2 x i1> [[TMP3]])
+; CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1(ptr [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 0, <4 x i1> [[TMP1]])
; CHECK-NEXT: ret void
;
entry:
%0 = zext i16 %p to i32
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
- call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1)
+ call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1)
ret void
}
More information about the llvm-commits
mailing list