[llvm] 7a3fb6a - AMDGPU: Convert some memcpy test to opaque pointers
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 28 11:12:01 PST 2022
Author: Matt Arsenault
Date: 2022-11-28T14:11:56-05:00
New Revision: 7a3fb6a6e3daebf11d85f62c5bd6c2800831a8fb
URL: https://github.com/llvm/llvm-project/commit/7a3fb6a6e3daebf11d85f62c5bd6c2800831a8fb
DIFF: https://github.com/llvm/llvm-project/commit/7a3fb6a6e3daebf11d85f62c5bd6c2800831a8fb.diff
LOG: AMDGPU: Convert some memcpy test to opaque pointers
memcpy-scoped-aa.ll required manually updating the IR references in
the MMOs
Added:
Modified:
llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll
llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
llvm/test/CodeGen/AMDGPU/mem-builtins.ll
llvm/test/CodeGen/AMDGPU/memcpy-fixed-align.ll
llvm/test/CodeGen/AMDGPU/memcpy-inline-fails.ll
llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll
index a83715d90a38..cd720e93a48f 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll
@@ -7,50 +7,50 @@
; Test the -amdgpu-mem-intrinsic-expand-size flag works.
; Make sure we can always eliminate the intrinsic, even at 0.
-define amdgpu_kernel void @memset_size_0(i8 addrspace(1)* %dst, i8 %val) {
+define amdgpu_kernel void @memset_size_0(ptr addrspace(1) %dst, i8 %val) {
; OPT8-LABEL: @memset_size_0(
-; OPT8-NEXT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* [[DST:%.*]], i8 [[VAL:%.*]], i64 0, i1 false)
+; OPT8-NEXT: call void @llvm.memset.p1.i64(ptr addrspace(1) [[DST:%.*]], i8 [[VAL:%.*]], i64 0, i1 false)
; OPT8-NEXT: ret void
;
; OPT4-LABEL: @memset_size_0(
-; OPT4-NEXT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* [[DST:%.*]], i8 [[VAL:%.*]], i64 0, i1 false)
+; OPT4-NEXT: call void @llvm.memset.p1.i64(ptr addrspace(1) [[DST:%.*]], i8 [[VAL:%.*]], i64 0, i1 false)
; OPT4-NEXT: ret void
;
; OPT0-LABEL: @memset_size_0(
-; OPT0-NEXT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* [[DST:%.*]], i8 [[VAL:%.*]], i64 0, i1 false)
+; OPT0-NEXT: call void @llvm.memset.p1.i64(ptr addrspace(1) [[DST:%.*]], i8 [[VAL:%.*]], i64 0, i1 false)
; OPT0-NEXT: ret void
;
; OPT_NEG-LABEL: @memset_size_0(
; OPT_NEG-NEXT: br i1 true, label [[SPLIT:%.*]], label [[LOADSTORELOOP:%.*]]
; OPT_NEG: loadstoreloop:
; OPT_NEG-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], [[LOADSTORELOOP]] ]
-; OPT_NEG-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST:%.*]], i64 [[TMP1]]
-; OPT_NEG-NEXT: store i8 [[VAL:%.*]], i8 addrspace(1)* [[TMP2]]
+; OPT_NEG-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[TMP1]]
+; OPT_NEG-NEXT: store i8 [[VAL:%.*]], ptr addrspace(1) [[TMP2]], align 1
; OPT_NEG-NEXT: [[TMP3]] = add i64 [[TMP1]], 1
; OPT_NEG-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 0
; OPT_NEG-NEXT: br i1 [[TMP4]], label [[LOADSTORELOOP]], label [[SPLIT]]
; OPT_NEG: split:
; OPT_NEG-NEXT: ret void
;
- call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 0, i1 false)
+ call void @llvm.memset.p1.i64(ptr addrspace(1) %dst, i8 %val, i64 0, i1 false)
ret void
}
-define amdgpu_kernel void @memset_size_4(i8 addrspace(1)* %dst, i8 %val) {
+define amdgpu_kernel void @memset_size_4(ptr addrspace(1) %dst, i8 %val) {
; OPT8-LABEL: @memset_size_4(
-; OPT8-NEXT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* [[DST:%.*]], i8 [[VAL:%.*]], i64 4, i1 false)
+; OPT8-NEXT: call void @llvm.memset.p1.i64(ptr addrspace(1) [[DST:%.*]], i8 [[VAL:%.*]], i64 4, i1 false)
; OPT8-NEXT: ret void
;
; OPT4-LABEL: @memset_size_4(
-; OPT4-NEXT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* [[DST:%.*]], i8 [[VAL:%.*]], i64 4, i1 false)
+; OPT4-NEXT: call void @llvm.memset.p1.i64(ptr addrspace(1) [[DST:%.*]], i8 [[VAL:%.*]], i64 4, i1 false)
; OPT4-NEXT: ret void
;
; OPT0-LABEL: @memset_size_4(
; OPT0-NEXT: br i1 false, label [[SPLIT:%.*]], label [[LOADSTORELOOP:%.*]]
; OPT0: loadstoreloop:
; OPT0-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], [[LOADSTORELOOP]] ]
-; OPT0-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST:%.*]], i64 [[TMP1]]
-; OPT0-NEXT: store i8 [[VAL:%.*]], i8 addrspace(1)* [[TMP2]]
+; OPT0-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[TMP1]]
+; OPT0-NEXT: store i8 [[VAL:%.*]], ptr addrspace(1) [[TMP2]], align 1
; OPT0-NEXT: [[TMP3]] = add i64 [[TMP1]], 1
; OPT0-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 4
; OPT0-NEXT: br i1 [[TMP4]], label [[LOADSTORELOOP]], label [[SPLIT]]
@@ -61,29 +61,29 @@ define amdgpu_kernel void @memset_size_4(i8 addrspace(1)* %dst, i8 %val) {
; OPT_NEG-NEXT: br i1 false, label [[SPLIT:%.*]], label [[LOADSTORELOOP:%.*]]
; OPT_NEG: loadstoreloop:
; OPT_NEG-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], [[LOADSTORELOOP]] ]
-; OPT_NEG-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST:%.*]], i64 [[TMP1]]
-; OPT_NEG-NEXT: store i8 [[VAL:%.*]], i8 addrspace(1)* [[TMP2]]
+; OPT_NEG-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[TMP1]]
+; OPT_NEG-NEXT: store i8 [[VAL:%.*]], ptr addrspace(1) [[TMP2]], align 1
; OPT_NEG-NEXT: [[TMP3]] = add i64 [[TMP1]], 1
; OPT_NEG-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 4
; OPT_NEG-NEXT: br i1 [[TMP4]], label [[LOADSTORELOOP]], label [[SPLIT]]
; OPT_NEG: split:
; OPT_NEG-NEXT: ret void
;
- call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 4, i1 false)
+ call void @llvm.memset.p1.i64(ptr addrspace(1) %dst, i8 %val, i64 4, i1 false)
ret void
}
-define amdgpu_kernel void @memset_size_8(i8 addrspace(1)* %dst, i8 %val) {
+define amdgpu_kernel void @memset_size_8(ptr addrspace(1) %dst, i8 %val) {
; OPT8-LABEL: @memset_size_8(
-; OPT8-NEXT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* [[DST:%.*]], i8 [[VAL:%.*]], i64 8, i1 false)
+; OPT8-NEXT: call void @llvm.memset.p1.i64(ptr addrspace(1) [[DST:%.*]], i8 [[VAL:%.*]], i64 8, i1 false)
; OPT8-NEXT: ret void
;
; OPT4-LABEL: @memset_size_8(
; OPT4-NEXT: br i1 false, label [[SPLIT:%.*]], label [[LOADSTORELOOP:%.*]]
; OPT4: loadstoreloop:
; OPT4-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], [[LOADSTORELOOP]] ]
-; OPT4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST:%.*]], i64 [[TMP1]]
-; OPT4-NEXT: store i8 [[VAL:%.*]], i8 addrspace(1)* [[TMP2]]
+; OPT4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[TMP1]]
+; OPT4-NEXT: store i8 [[VAL:%.*]], ptr addrspace(1) [[TMP2]], align 1
; OPT4-NEXT: [[TMP3]] = add i64 [[TMP1]], 1
; OPT4-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 8
; OPT4-NEXT: br i1 [[TMP4]], label [[LOADSTORELOOP]], label [[SPLIT]]
@@ -94,8 +94,8 @@ define amdgpu_kernel void @memset_size_8(i8 addrspace(1)* %dst, i8 %val) {
; OPT0-NEXT: br i1 false, label [[SPLIT:%.*]], label [[LOADSTORELOOP:%.*]]
; OPT0: loadstoreloop:
; OPT0-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], [[LOADSTORELOOP]] ]
-; OPT0-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST:%.*]], i64 [[TMP1]]
-; OPT0-NEXT: store i8 [[VAL:%.*]], i8 addrspace(1)* [[TMP2]]
+; OPT0-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[TMP1]]
+; OPT0-NEXT: store i8 [[VAL:%.*]], ptr addrspace(1) [[TMP2]], align 1
; OPT0-NEXT: [[TMP3]] = add i64 [[TMP1]], 1
; OPT0-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 8
; OPT0-NEXT: br i1 [[TMP4]], label [[LOADSTORELOOP]], label [[SPLIT]]
@@ -106,18 +106,18 @@ define amdgpu_kernel void @memset_size_8(i8 addrspace(1)* %dst, i8 %val) {
; OPT_NEG-NEXT: br i1 false, label [[SPLIT:%.*]], label [[LOADSTORELOOP:%.*]]
; OPT_NEG: loadstoreloop:
; OPT_NEG-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], [[LOADSTORELOOP]] ]
-; OPT_NEG-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST:%.*]], i64 [[TMP1]]
-; OPT_NEG-NEXT: store i8 [[VAL:%.*]], i8 addrspace(1)* [[TMP2]]
+; OPT_NEG-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[TMP1]]
+; OPT_NEG-NEXT: store i8 [[VAL:%.*]], ptr addrspace(1) [[TMP2]], align 1
; OPT_NEG-NEXT: [[TMP3]] = add i64 [[TMP1]], 1
; OPT_NEG-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 8
; OPT_NEG-NEXT: br i1 [[TMP4]], label [[LOADSTORELOOP]], label [[SPLIT]]
; OPT_NEG: split:
; OPT_NEG-NEXT: ret void
;
- call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 8, i1 false)
+ call void @llvm.memset.p1.i64(ptr addrspace(1) %dst, i8 %val, i64 8, i1 false)
ret void
}
-declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture writeonly, i8, i64, i1 immarg) #0
+declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture writeonly, i8, i64, i1 immarg) #0
attributes #0 = { argmemonly nounwind willreturn writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
index 01c5c45f7973..d9891228e6e2 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
@@ -2,17 +2,17 @@
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-lower-intrinsics -amdgpu-mem-intrinsic-expand-size=1024 %s | FileCheck -check-prefixes=OPT,MAX1024 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-lower-intrinsics -amdgpu-mem-intrinsic-expand-size=-1 %s | FileCheck -check-prefixes=OPT,ALL %s
-declare void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture readonly, i64, i1) #1
-declare void @llvm.memcpy.p1i8.p3i8.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1
-declare void @llvm.memcpy.p3i8.p1i8.i32(ptr addrspace(3) nocapture, ptr addrspace(1) nocapture readonly, i32, i1) #1
-declare void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture readonly, i32, i1) #1
-declare void @llvm.memcpy.p3i8.p3i8.i32(ptr addrspace(3) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1
+declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture readonly, i64, i1) #1
+declare void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1
+declare void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) nocapture, ptr addrspace(1) nocapture readonly, i32, i1) #1
+declare void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture readonly, i32, i1) #1
+declare void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1
-declare void @llvm.memmove.p1i8.p1i8.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture readonly, i64, i1) #1
-declare void @llvm.memmove.p1i8.p3i8.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1
-declare void @llvm.memmove.p5i8.p5i8.i32(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture readonly, i32, i1) #1
+declare void @llvm.memmove.p1.p1.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture readonly, i64, i1) #1
+declare void @llvm.memmove.p1.p3.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1
+declare void @llvm.memmove.p5.p5.i32(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture readonly, i32, i1) #1
-declare void @llvm.memset.p1i8.i64(ptr addrspace(1) nocapture, i8, i64, i1) #1
+declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture, i8, i64, i1) #1
; Test the upper bound for sizes to leave
define amdgpu_kernel void @max_size_small_static_memcpy_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
@@ -34,7 +34,7 @@ define amdgpu_kernel void @max_size_small_static_memcpy_caller0(ptr addrspace(1)
; ALL: memcpy-split:
; ALL-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 1024, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 1024, i1 false)
ret void
}
@@ -58,7 +58,7 @@ define amdgpu_kernel void @min_size_large_static_memcpy_caller0(ptr addrspace(1)
; OPT-NEXT: store i8 [[TMP7]], ptr addrspace(1) [[TMP8]], align 1
; OPT-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 1025, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 1025, i1 false)
ret void
}
@@ -96,7 +96,7 @@ define amdgpu_kernel void @max_size_small_static_memmove_caller0(ptr addrspace(1
; ALL: memmove_done:
; ALL-NEXT: ret void
;
- call void @llvm.memmove.p1i8.p1i8.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 1024, i1 false)
+ call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 1024, i1 false)
ret void
}
@@ -130,7 +130,7 @@ define amdgpu_kernel void @min_size_large_static_memmove_caller0(ptr addrspace(1
; OPT: memmove_done:
; OPT-NEXT: ret void
;
- call void @llvm.memmove.p1i8.p1i8.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 1025, i1 false)
+ call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 1025, i1 false)
ret void
}
@@ -151,7 +151,7 @@ define amdgpu_kernel void @max_size_small_static_memset_caller0(ptr addrspace(1)
; ALL: split:
; ALL-NEXT: ret void
;
- call void @llvm.memset.p1i8.i64(ptr addrspace(1) %dst, i8 %val, i64 1024, i1 false)
+ call void @llvm.memset.p1.i64(ptr addrspace(1) %dst, i8 %val, i64 1024, i1 false)
ret void
}
@@ -168,7 +168,7 @@ define amdgpu_kernel void @min_size_large_static_memset_caller0(ptr addrspace(1)
; OPT: split:
; OPT-NEXT: ret void
;
- call void @llvm.memset.p1i8.i64(ptr addrspace(1) %dst, i8 %val, i64 1025, i1 false)
+ call void @llvm.memset.p1.i64(ptr addrspace(1) %dst, i8 %val, i64 1025, i1 false)
ret void
}
@@ -204,7 +204,7 @@ define amdgpu_kernel void @variable_memcpy_caller0(ptr addrspace(1) %dst, ptr ad
; OPT-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP2]], 0
; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n, i1 false)
ret void
}
@@ -240,7 +240,7 @@ define amdgpu_kernel void @variable_memcpy_caller1(ptr addrspace(1) %dst, ptr ad
; OPT-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP2]], 0
; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n, i1 false)
ret void
}
@@ -250,62 +250,62 @@ define amdgpu_kernel void @memcpy_multi_use_one_function(ptr addrspace(1) %dst0,
; OPT-NEXT: [[TMP2:%.*]] = urem i64 [[N]], 16
; OPT-NEXT: [[TMP3:%.*]] = sub i64 [[N]], [[TMP2]]
; OPT-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP1]], 0
-; OPT-NEXT: br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
-; OPT: loop-memcpy-expansion:
-; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
-; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
+; OPT-NEXT: br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION2:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER5:%.*]]
+; OPT: loop-memcpy-expansion2:
+; OPT-NEXT: [[LOOP_INDEX3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION2]] ]
+; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX3]]
; OPT-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1
-; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST0:%.*]], i64 [[LOOP_INDEX]]
+; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST0:%.*]], i64 [[LOOP_INDEX3]]
; OPT-NEXT: store <4 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 1
-; OPT-NEXT: [[TMP8]] = add i64 [[LOOP_INDEX]], 1
+; OPT-NEXT: [[TMP8]] = add i64 [[LOOP_INDEX3]], 1
; OPT-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP8]], [[TMP1]]
-; OPT-NEXT: br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
-; OPT: loop-memcpy-residual:
-; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
-; OPT-NEXT: [[TMP10:%.*]] = add i64 [[TMP3]], [[RESIDUAL_LOOP_INDEX]]
+; OPT-NEXT: br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION2]], label [[LOOP_MEMCPY_RESIDUAL_HEADER5]]
+; OPT: loop-memcpy-residual4:
+; OPT-NEXT: [[RESIDUAL_LOOP_INDEX6:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER5]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL4:%.*]] ]
+; OPT-NEXT: [[TMP10:%.*]] = add i64 [[TMP3]], [[RESIDUAL_LOOP_INDEX6]]
; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP10]]
; OPT-NEXT: [[TMP12:%.*]] = load i8, ptr addrspace(1) [[TMP11]], align 1
; OPT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST0]], i64 [[TMP10]]
; OPT-NEXT: store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 1
-; OPT-NEXT: [[TMP14]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
+; OPT-NEXT: [[TMP14]] = add i64 [[RESIDUAL_LOOP_INDEX6]], 1
; OPT-NEXT: [[TMP15:%.*]] = icmp ult i64 [[TMP14]], [[TMP2]]
-; OPT-NEXT: br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
-; OPT: post-loop-memcpy-expansion:
+; OPT-NEXT: br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL4]], label [[POST_LOOP_MEMCPY_EXPANSION1:%.*]]
+; OPT: post-loop-memcpy-expansion1:
; OPT-NEXT: [[TMP16:%.*]] = udiv i64 [[M:%.*]], 16
; OPT-NEXT: [[TMP17:%.*]] = urem i64 [[M]], 16
; OPT-NEXT: [[TMP18:%.*]] = sub i64 [[M]], [[TMP17]]
; OPT-NEXT: [[TMP19:%.*]] = icmp ne i64 [[TMP16]], 0
-; OPT-NEXT: br i1 [[TMP19]], label [[LOOP_MEMCPY_EXPANSION2:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER5:%.*]]
-; OPT: loop-memcpy-expansion2:
-; OPT-NEXT: [[LOOP_INDEX3:%.*]] = phi i64 [ 0, [[POST_LOOP_MEMCPY_EXPANSION]] ], [ [[TMP23:%.*]], [[LOOP_MEMCPY_EXPANSION2]] ]
-; OPT-NEXT: [[TMP20:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC]], i64 [[LOOP_INDEX3]]
+; OPT-NEXT: br i1 [[TMP19]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
+; OPT: loop-memcpy-expansion:
+; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[POST_LOOP_MEMCPY_EXPANSION1]] ], [ [[TMP23:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
+; OPT-NEXT: [[TMP20:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC]], i64 [[LOOP_INDEX]]
; OPT-NEXT: [[TMP21:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP20]], align 1
-; OPT-NEXT: [[TMP22:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST1:%.*]], i64 [[LOOP_INDEX3]]
+; OPT-NEXT: [[TMP22:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST1:%.*]], i64 [[LOOP_INDEX]]
; OPT-NEXT: store <4 x i32> [[TMP21]], ptr addrspace(1) [[TMP22]], align 1
-; OPT-NEXT: [[TMP23]] = add i64 [[LOOP_INDEX3]], 1
+; OPT-NEXT: [[TMP23]] = add i64 [[LOOP_INDEX]], 1
; OPT-NEXT: [[TMP24:%.*]] = icmp ult i64 [[TMP23]], [[TMP16]]
-; OPT-NEXT: br i1 [[TMP24]], label [[LOOP_MEMCPY_EXPANSION2]], label [[LOOP_MEMCPY_RESIDUAL_HEADER5]]
-; OPT: loop-memcpy-residual4:
-; OPT-NEXT: [[RESIDUAL_LOOP_INDEX6:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER5]] ], [ [[TMP29:%.*]], [[LOOP_MEMCPY_RESIDUAL4:%.*]] ]
-; OPT-NEXT: [[TMP25:%.*]] = add i64 [[TMP18]], [[RESIDUAL_LOOP_INDEX6]]
+; OPT-NEXT: br i1 [[TMP24]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
+; OPT: loop-memcpy-residual:
+; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP29:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
+; OPT-NEXT: [[TMP25:%.*]] = add i64 [[TMP18]], [[RESIDUAL_LOOP_INDEX]]
; OPT-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP25]]
; OPT-NEXT: [[TMP27:%.*]] = load i8, ptr addrspace(1) [[TMP26]], align 1
; OPT-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST1]], i64 [[TMP25]]
; OPT-NEXT: store i8 [[TMP27]], ptr addrspace(1) [[TMP28]], align 1
-; OPT-NEXT: [[TMP29]] = add i64 [[RESIDUAL_LOOP_INDEX6]], 1
+; OPT-NEXT: [[TMP29]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
; OPT-NEXT: [[TMP30:%.*]] = icmp ult i64 [[TMP29]], [[TMP17]]
-; OPT-NEXT: br i1 [[TMP30]], label [[LOOP_MEMCPY_RESIDUAL4]], label [[POST_LOOP_MEMCPY_EXPANSION1:%.*]]
-; OPT: post-loop-memcpy-expansion1:
+; OPT-NEXT: br i1 [[TMP30]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
+; OPT: post-loop-memcpy-expansion:
; OPT-NEXT: ret void
; OPT: loop-memcpy-residual-header:
-; OPT-NEXT: [[TMP31:%.*]] = icmp ne i64 [[TMP2]], 0
+; OPT-NEXT: [[TMP31:%.*]] = icmp ne i64 [[TMP17]], 0
; OPT-NEXT: br i1 [[TMP31]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
; OPT: loop-memcpy-residual-header5:
-; OPT-NEXT: [[TMP32:%.*]] = icmp ne i64 [[TMP17]], 0
+; OPT-NEXT: [[TMP32:%.*]] = icmp ne i64 [[TMP2]], 0
; OPT-NEXT: br i1 [[TMP32]], label [[LOOP_MEMCPY_RESIDUAL4]], label [[POST_LOOP_MEMCPY_EXPANSION1]]
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst0, ptr addrspace(1) %src, i64 %n, i1 false)
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst1, ptr addrspace(1) %src, i64 %m, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dst0, ptr addrspace(1) %src, i64 %n, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dst1, ptr addrspace(1) %src, i64 %m, i1 false)
ret void
}
@@ -341,7 +341,7 @@ define amdgpu_kernel void @memcpy_alt_type(ptr addrspace(1) %dst, ptr addrspace(
; OPT-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP2]], 0
; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
;
- call void @llvm.memcpy.p1i8.p3i8.i32(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 %n, i1 false)
+ call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 %n, i1 false)
ret void
}
@@ -386,12 +386,12 @@ define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(ptr addrspac
; ALL-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP1]], 0
; ALL-NEXT: br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
; ALL: loop-memcpy-expansion:
-; ALL-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
-; ALL-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
+; ALL-NEXT: [[LOOP_INDEX1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
+; ALL-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX1]]
; ALL-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1
-; ALL-NEXT: [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST0:%.*]], i64 [[LOOP_INDEX]]
+; ALL-NEXT: [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST0:%.*]], i64 [[LOOP_INDEX1]]
; ALL-NEXT: store <4 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 1
-; ALL-NEXT: [[TMP8]] = add i64 [[LOOP_INDEX]], 1
+; ALL-NEXT: [[TMP8]] = add i64 [[LOOP_INDEX1]], 1
; ALL-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP8]], [[TMP1]]
; ALL-NEXT: br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
; ALL: loop-memcpy-residual:
@@ -407,12 +407,12 @@ define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(ptr addrspac
; ALL: post-loop-memcpy-expansion:
; ALL-NEXT: br label [[LOAD_STORE_LOOP:%.*]]
; ALL: load-store-loop:
-; ALL-NEXT: [[LOOP_INDEX1:%.*]] = phi i64 [ 0, [[POST_LOOP_MEMCPY_EXPANSION]] ], [ [[TMP19:%.*]], [[LOAD_STORE_LOOP]] ]
-; ALL-NEXT: [[TMP16:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC]], i64 [[LOOP_INDEX1]]
+; ALL-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[POST_LOOP_MEMCPY_EXPANSION]] ], [ [[TMP19:%.*]], [[LOAD_STORE_LOOP]] ]
+; ALL-NEXT: [[TMP16:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC]], i64 [[LOOP_INDEX]]
; ALL-NEXT: [[TMP17:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP16]], align 1
-; ALL-NEXT: [[TMP18:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST1:%.*]], i64 [[LOOP_INDEX1]]
+; ALL-NEXT: [[TMP18:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST1:%.*]], i64 [[LOOP_INDEX]]
; ALL-NEXT: store <4 x i32> [[TMP17]], ptr addrspace(1) [[TMP18]], align 1
-; ALL-NEXT: [[TMP19]] = add i64 [[LOOP_INDEX1]], 1
+; ALL-NEXT: [[TMP19]] = add i64 [[LOOP_INDEX]], 1
; ALL-NEXT: [[TMP20:%.*]] = icmp ult i64 [[TMP19]], 6
; ALL-NEXT: br i1 [[TMP20]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
; ALL: memcpy-split:
@@ -429,8 +429,8 @@ define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(ptr addrspac
; ALL-NEXT: [[TMP27:%.*]] = icmp ne i64 [[TMP2]], 0
; ALL-NEXT: br i1 [[TMP27]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst0, ptr addrspace(1) %src, i64 %n, i1 false)
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst1, ptr addrspace(1) %src, i64 102, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dst0, ptr addrspace(1) %src, i64 %n, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dst1, ptr addrspace(1) %src, i64 102, i1 false)
ret void
}
@@ -453,7 +453,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1028(ptr addrspace
; OPT-NEXT: store i32 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4
; OPT-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1028, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1028, i1 false)
ret void
}
@@ -476,7 +476,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1025(ptr addrspace
; OPT-NEXT: store i8 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4
; OPT-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1025, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1025, i1 false)
ret void
}
@@ -499,7 +499,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1026(ptr addrspace
; OPT-NEXT: store i16 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4
; OPT-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1026, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1026, i1 false)
ret void
}
@@ -522,7 +522,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1032(ptr addrspace
; OPT-NEXT: store i64 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4
; OPT-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1032, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1032, i1 false)
ret void
}
@@ -549,7 +549,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1034(ptr addrspace
; OPT-NEXT: store i16 [[TMP10]], ptr addrspace(1) [[TMP11]], align 4
; OPT-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1034, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1034, i1 false)
ret void
}
@@ -580,7 +580,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1035(ptr addrspace
; OPT-NEXT: store i8 [[TMP13]], ptr addrspace(1) [[TMP14]], align 2
; OPT-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1035, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1035, i1 false)
ret void
}
@@ -607,7 +607,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1036(ptr addrspace
; OPT-NEXT: store i32 [[TMP10]], ptr addrspace(1) [[TMP11]], align 4
; OPT-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1036, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1036, i1 false)
ret void
}
@@ -642,7 +642,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1039(ptr addrspace
; OPT-NEXT: store i8 [[TMP16]], ptr addrspace(1) [[TMP17]], align 2
; OPT-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1039, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1039, i1 false)
ret void
}
@@ -665,7 +665,7 @@ define amdgpu_kernel void @memcpy_global_align2_global_align2_1039(ptr addrspace
; OPT-NEXT: store i8 [[TMP7]], ptr addrspace(1) [[TMP8]], align 2
; OPT-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 %src, i64 1039, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 %src, i64 1039, i1 false)
ret void
}
@@ -692,7 +692,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1027(ptr addrspace
; OPT-NEXT: store i8 [[TMP10]], ptr addrspace(1) [[TMP11]], align 2
; OPT-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1027, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1027, i1 false)
ret void
}
@@ -715,7 +715,7 @@ define amdgpu_kernel void @memcpy_global_align2_global_align4_1027(ptr addrspace
; OPT-NEXT: store i8 [[TMP7]], ptr addrspace(1) [[TMP8]], align 2
; OPT-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 4 %src, i64 1027, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 4 %src, i64 1027, i1 false)
ret void
}
@@ -738,7 +738,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align2_1027(ptr addrspace
; OPT-NEXT: store i8 [[TMP7]], ptr addrspace(1) [[TMP8]], align 2
; OPT-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 2 %src, i64 1027, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 2 %src, i64 1027, i1 false)
ret void
}
@@ -765,7 +765,7 @@ define amdgpu_kernel void @memcpy_private_align4_private_align4_1027(ptr addrspa
; OPT-NEXT: store i8 [[TMP10]], ptr addrspace(5) [[TMP11]], align 2
; OPT-NEXT: ret void
;
- call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 4 %dst, ptr addrspace(5) align 4 %src, i32 1027, i1 false)
+ call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 %dst, ptr addrspace(5) align 4 %src, i32 1027, i1 false)
ret void
}
@@ -788,7 +788,7 @@ define amdgpu_kernel void @memcpy_private_align2_private_align4_1027(ptr addrspa
; OPT-NEXT: store i8 [[TMP7]], ptr addrspace(5) [[TMP8]], align 2
; OPT-NEXT: ret void
;
- call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 4 %src, i32 1027, i1 false)
+ call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 4 %src, i32 1027, i1 false)
ret void
}
@@ -815,7 +815,7 @@ define amdgpu_kernel void @memcpy_private_align1_private_align4_1027(ptr addrspa
; OPT-NEXT: store i8 [[TMP10]], ptr addrspace(5) [[TMP11]], align 1
; OPT-NEXT: ret void
;
- call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 4 %src, i32 1027, i1 false)
+ call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 4 %src, i32 1027, i1 false)
ret void
}
@@ -838,7 +838,7 @@ define amdgpu_kernel void @memcpy_private_align4_private_align2_1027(ptr addrspa
; OPT-NEXT: store i8 [[TMP7]], ptr addrspace(5) [[TMP8]], align 2
; OPT-NEXT: ret void
;
- call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 4 %dst, ptr addrspace(5) align 2 %src, i32 1027, i1 false)
+ call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 %dst, ptr addrspace(5) align 2 %src, i32 1027, i1 false)
ret void
}
@@ -865,7 +865,7 @@ define amdgpu_kernel void @memcpy_private_align4_private_align1_1027(ptr addrspa
; OPT-NEXT: store i8 [[TMP10]], ptr addrspace(5) [[TMP11]], align 2
; OPT-NEXT: ret void
;
- call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 4 %dst, ptr addrspace(5) align 1 %src, i32 1027, i1 false)
+ call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 %dst, ptr addrspace(5) align 1 %src, i32 1027, i1 false)
ret void
}
@@ -888,7 +888,7 @@ define amdgpu_kernel void @memcpy_private_align2_private_align2_1027(ptr addrspa
; OPT-NEXT: store i8 [[TMP7]], ptr addrspace(5) [[TMP8]], align 2
; OPT-NEXT: ret void
;
- call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 %src, i32 1027, i1 false)
+ call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 %src, i32 1027, i1 false)
ret void
}
@@ -924,7 +924,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_variable(ptr addrs
; OPT-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP2]], 0
; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 %n, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 %n, i1 false)
ret void
}
@@ -960,7 +960,7 @@ define amdgpu_kernel void @memcpy_global_align2_global_align2_variable(ptr addrs
; OPT-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP2]], 0
; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 %src, i64 %n, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 %src, i64 %n, i1 false)
ret void
}
@@ -996,7 +996,7 @@ define amdgpu_kernel void @memcpy_global_align1_global_align1_variable(ptr addrs
; OPT-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP2]], 0
; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 %src, i64 %n, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 %src, i64 %n, i1 false)
ret void
}
@@ -1032,7 +1032,7 @@ define amdgpu_kernel void @memcpy_local_align4_local_align4_variable(ptr addrspa
; OPT-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP2]], 0
; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
;
- call void @llvm.memcpy.p3i8.p3i8.i32(ptr addrspace(3) align 4 %dst, ptr addrspace(3) align 4 %src, i32 %n, i1 false)
+ call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) align 4 %dst, ptr addrspace(3) align 4 %src, i32 %n, i1 false)
ret void
}
@@ -1068,7 +1068,7 @@ define amdgpu_kernel void @memcpy_local_align2_local_align2_variable(ptr addrspa
; OPT-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP2]], 0
; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
;
- call void @llvm.memcpy.p3i8.p3i8.i32(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 %src, i32 %n, i1 false)
+ call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 %src, i32 %n, i1 false)
ret void
}
@@ -1104,7 +1104,7 @@ define amdgpu_kernel void @memcpy_local_align1_local_align1_variable(ptr addrspa
; OPT-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP2]], 0
; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
;
- call void @llvm.memcpy.p3i8.p3i8.i32(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 %src, i32 %n, i1 false)
+ call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 %src, i32 %n, i1 false)
ret void
}
@@ -1140,7 +1140,7 @@ define amdgpu_kernel void @memcpy_local_align4_global_align4_variable(ptr addrsp
; OPT-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP2]], 0
; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
;
- call void @llvm.memcpy.p3i8.p1i8.i32(ptr addrspace(3) align 4 %dst, ptr addrspace(1) align 4 %src, i32 %n, i1 false)
+ call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 4 %dst, ptr addrspace(1) align 4 %src, i32 %n, i1 false)
ret void
}
@@ -1176,7 +1176,7 @@ define amdgpu_kernel void @memcpy_global_align4_local_align4_variable(ptr addrsp
; OPT-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP2]], 0
; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
;
- call void @llvm.memcpy.p1i8.p3i8.i32(ptr addrspace(1) align 4 %dst, ptr addrspace(3) align 4 %src, i32 %n, i1 false)
+ call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %dst, ptr addrspace(3) align 4 %src, i32 %n, i1 false)
ret void
}
@@ -1199,7 +1199,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_16(ptr addrspace(1
; ALL: memcpy-split:
; ALL-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 16, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 16, i1 false)
ret void
}
@@ -1219,7 +1219,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_12(ptr addrspace(1
; ALL-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[TMP6]], align 4
; ALL-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 12, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 12, i1 false)
ret void
}
@@ -1235,7 +1235,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_8(ptr addrspace(1)
; ALL-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
; ALL-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 8, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 8, i1 false)
ret void
}
@@ -1255,7 +1255,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_10(ptr addrspace(1
; ALL-NEXT: store i16 [[TMP5]], ptr addrspace(1) [[TMP6]], align 4
; ALL-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 10, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 10, i1 false)
ret void
}
@@ -1271,7 +1271,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_4(ptr addrspace(1)
; ALL-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
; ALL-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 4, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 4, i1 false)
ret void
}
@@ -1287,7 +1287,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_2(ptr addrspace(1)
; ALL-NEXT: store i16 [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
; ALL-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 2, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 2, i1 false)
ret void
}
@@ -1303,7 +1303,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1(ptr addrspace(1)
; ALL-NEXT: store i8 [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
; ALL-NEXT: ret void
;
- call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1, i1 false)
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1, i1 false)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/mem-builtins.ll b/llvm/test/CodeGen/AMDGPU/mem-builtins.ll
index 5ae0ac5d2bd8..e1776ef20ffb 100644
--- a/llvm/test/CodeGen/AMDGPU/mem-builtins.ll
+++ b/llvm/test/CodeGen/AMDGPU/mem-builtins.ll
@@ -1,72 +1,72 @@
; RUN: not llc -march=r600 < %s 2>&1 | FileCheck -check-prefix=ERROR %s
; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s
-declare hidden i32 @memcmp(i8 addrspace(1)* readonly nocapture, i8 addrspace(1)* readonly nocapture, i64) #0
-declare hidden i8 addrspace(1)* @memchr(i8 addrspace(1)* readonly nocapture, i32, i64) #1
-declare hidden i8* @strcpy(i8* nocapture, i8* readonly nocapture) #0
-declare hidden i32 @strlen(i8* nocapture) #1
-declare hidden i32 @strnlen(i8* nocapture, i32) #1
-declare hidden i32 @strcmp(i8* nocapture, i8* nocapture) #1
+declare hidden i32 @memcmp(ptr addrspace(1) readonly nocapture, ptr addrspace(1) readonly nocapture, i64) #0
+declare hidden ptr addrspace(1) @memchr(ptr addrspace(1) readonly nocapture, i32, i64) #1
+declare hidden ptr @strcpy(ptr nocapture, ptr readonly nocapture) #0
+declare hidden i32 @strlen(ptr nocapture) #1
+declare hidden i32 @strnlen(ptr nocapture, i32) #1
+declare hidden i32 @strcmp(ptr nocapture, ptr nocapture) #1
-; ERROR: error: <unknown>:0:0: in function test_memcmp void (i8 addrspace(1)*, i8 addrspace(1)*, i32*): unsupported call to function memcmp
+; ERROR: error: <unknown>:0:0: in function test_memcmp void (ptr addrspace(1), ptr addrspace(1), ptr): unsupported call to function memcmp
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, memcmp at rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, memcmp at rel32@hi+12
-define amdgpu_kernel void @test_memcmp(i8 addrspace(1)* %x, i8 addrspace(1)* %y, i32* nocapture %p) #0 {
+define amdgpu_kernel void @test_memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, ptr nocapture %p) #0 {
entry:
- %cmp = tail call i32 @memcmp(i8 addrspace(1)* %x, i8 addrspace(1)* %y, i64 2)
- store volatile i32 %cmp, i32 addrspace(1)* undef
+ %cmp = tail call i32 @memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, i64 2)
+ store volatile i32 %cmp, ptr addrspace(1) undef
ret void
}
-; ERROR: error: <unknown>:0:0: in function test_memchr void (i8 addrspace(1)*, i32, i64): unsupported call to function memchr
+; ERROR: error: <unknown>:0:0: in function test_memchr void (ptr addrspace(1), i32, i64): unsupported call to function memchr
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, memchr at rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, memchr at rel32@hi+12
-define amdgpu_kernel void @test_memchr(i8 addrspace(1)* %src, i32 %char, i64 %len) #0 {
- %res = call i8 addrspace(1)* @memchr(i8 addrspace(1)* %src, i32 %char, i64 %len)
- store volatile i8 addrspace(1)* %res, i8 addrspace(1)* addrspace(1)* undef
+define amdgpu_kernel void @test_memchr(ptr addrspace(1) %src, i32 %char, i64 %len) #0 {
+ %res = call ptr addrspace(1) @memchr(ptr addrspace(1) %src, i32 %char, i64 %len)
+ store volatile ptr addrspace(1) %res, ptr addrspace(1) undef
ret void
}
-; ERROR: error: <unknown>:0:0: in function test_strcpy void (i8*, i8*): unsupported call to function strcpy
+; ERROR: error: <unknown>:0:0: in function test_strcpy void (ptr, ptr): unsupported call to function strcpy
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcpy at rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcpy at rel32@hi+12
-define amdgpu_kernel void @test_strcpy(i8* %dst, i8* %src) #0 {
- %res = call i8* @strcpy(i8* %dst, i8* %src)
- store volatile i8* %res, i8* addrspace(1)* undef
+define amdgpu_kernel void @test_strcpy(ptr %dst, ptr %src) #0 {
+ %res = call ptr @strcpy(ptr %dst, ptr %src)
+ store volatile ptr %res, ptr addrspace(1) undef
ret void
}
-; ERROR: error: <unknown>:0:0: in function test_strcmp void (i8*, i8*): unsupported call to function strcmp
+; ERROR: error: <unknown>:0:0: in function test_strcmp void (ptr, ptr): unsupported call to function strcmp
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcmp at rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcmp at rel32@hi+12
-define amdgpu_kernel void @test_strcmp(i8* %src0, i8* %src1) #0 {
- %res = call i32 @strcmp(i8* %src0, i8* %src1)
- store volatile i32 %res, i32 addrspace(1)* undef
+define amdgpu_kernel void @test_strcmp(ptr %src0, ptr %src1) #0 {
+ %res = call i32 @strcmp(ptr %src0, ptr %src1)
+ store volatile i32 %res, ptr addrspace(1) undef
ret void
}
-; ERROR: error: <unknown>:0:0: in function test_strlen void (i8*): unsupported call to function strlen
+; ERROR: error: <unknown>:0:0: in function test_strlen void (ptr): unsupported call to function strlen
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strlen at rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strlen at rel32@hi+12
-define amdgpu_kernel void @test_strlen(i8* %src) #0 {
- %res = call i32 @strlen(i8* %src)
- store volatile i32 %res, i32 addrspace(1)* undef
+define amdgpu_kernel void @test_strlen(ptr %src) #0 {
+ %res = call i32 @strlen(ptr %src)
+ store volatile i32 %res, ptr addrspace(1) undef
ret void
}
-; ERROR: error: <unknown>:0:0: in function test_strnlen void (i8*, i32): unsupported call to function strnlen
+; ERROR: error: <unknown>:0:0: in function test_strnlen void (ptr, i32): unsupported call to function strnlen
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strnlen at rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strnlen at rel32@hi+12
-define amdgpu_kernel void @test_strnlen(i8* %src, i32 %size) #0 {
- %res = call i32 @strnlen(i8* %src, i32 %size)
- store volatile i32 %res, i32 addrspace(1)* undef
+define amdgpu_kernel void @test_strnlen(ptr %src, i32 %size) #0 {
+ %res = call i32 @strnlen(ptr %src, i32 %size)
+ store volatile i32 %res, ptr addrspace(1) undef
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-fixed-align.ll b/llvm/test/CodeGen/AMDGPU/memcpy-fixed-align.ll
index 3e1cb1ffb299..a5e0ceaa6b32 100644
--- a/llvm/test/CodeGen/AMDGPU/memcpy-fixed-align.ll
+++ b/llvm/test/CodeGen/AMDGPU/memcpy-fixed-align.ll
@@ -3,7 +3,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+enable-flat-scratch < %s | FileCheck %s -check-prefix=FLATSCR
; Make sure there's no assertion from passing a 0 alignment value
-define void @memcpy_fixed_align(i8 addrspace(5)* %dst, i8 addrspace(1)* %src) {
+define void @memcpy_fixed_align(ptr addrspace(5) %dst, ptr addrspace(1) %src) {
; MUBUF-LABEL: memcpy_fixed_align:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -49,12 +49,11 @@ define void @memcpy_fixed_align(i8 addrspace(5)* %dst, i8 addrspace(1)* %src) {
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca [40 x i8], addrspace(5)
- %cast = bitcast [40 x i8] addrspace(5)* %alloca to i8 addrspace(5)*
- call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* align 4 dereferenceable(40) %cast, i8 addrspace(1)* align 4 dereferenceable(40) %src, i64 40, i1 false)
- call void asm sideeffect "; use $0", "s"([40 x i8] addrspace(5)* %alloca) #0
+ call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 dereferenceable(40) %alloca, ptr addrspace(1) align 4 dereferenceable(40) %src, i64 40, i1 false)
+ call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %alloca) #0
ret void
}
-declare void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* noalias nocapture writeonly, i8 addrspace(1)* noalias nocapture readonly, i64, i1 immarg) #0
+declare void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #0
attributes #0 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-inline-fails.ll b/llvm/test/CodeGen/AMDGPU/memcpy-inline-fails.ll
index 88107ea23252..c1a0bfb48e5f 100644
--- a/llvm/test/CodeGen/AMDGPU/memcpy-inline-fails.ll
+++ b/llvm/test/CodeGen/AMDGPU/memcpy-inline-fails.ll
@@ -2,10 +2,10 @@
; RUN: llc < %s -mtriple=r600-unknown-linux-gnu 2> %t.err || true
; RUN: FileCheck --input-file %t.err %s
-declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.inline.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
-define void @test1(i8* %a, i8* %b) nounwind {
+define void @test1(ptr %a, ptr %b) nounwind {
; CHECK: LLVM ERROR
- tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %a, i8* %b, i64 8, i1 0 )
+ tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %a, ptr %b, i64 8, i1 0 )
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.ll b/llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.ll
index faf18d6c403c..ce3bd34cc1b4 100644
--- a/llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.ll
+++ b/llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.ll
@@ -12,9 +12,9 @@
; MIR-DAG: ![[SET1:[0-9]+]] = !{![[SCOPE1]]}
; MIR-LABEL: name: test_memcpy
-; MIR: [[LOAD:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 %{{[0-9]+}}, 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
-; MIR: GLOBAL_STORE_DWORDX4 %{{[0-9]+}}, killed [[LOAD]], 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
-define i32 @test_memcpy(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) {
+; MIR: [[LOAD:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 %{{[0-9]+}}, 16, 0, implicit $exec :: (load (s128) from %ir.add.ptr, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
+; MIR: GLOBAL_STORE_DWORDX4 %{{[0-9]+}}, killed [[LOAD]], 0, 0, implicit $exec :: (store (s128) into %ir.p, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
+define i32 @test_memcpy(ptr addrspace(1) nocapture %p, ptr addrspace(1) nocapture readonly %q) {
; Check loads of %q are scheduled ahead of that store of the memcpy on %p.
; CHECK-LABEL: test_memcpy:
; CHECK-DAG: global_load_dwordx2 v[[[Q0:[0-9]+]]:[[Q1:[0-9]+]]], v[2:3], off
@@ -22,21 +22,19 @@ define i32 @test_memcpy(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapt
; CHECK-DAG: v_add_nc_u32_e32 v{{[0-9]+}}, v[[Q0]], v[[Q1]]
; CHECK: global_store_dwordx4 v[0:1], [[PVAL]], off
; CHECK: s_setpc_b64 s[30:31]
- %p0 = bitcast i32 addrspace(1)* %p to i8 addrspace(1)*
- %add.ptr = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 4
- %p1 = bitcast i32 addrspace(1)* %add.ptr to i8 addrspace(1)*
- tail call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p0, i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !2, !noalias !4
- %v0 = load i32, i32 addrspace(1)* %q, align 4, !alias.scope !4, !noalias !2
- %q1 = getelementptr inbounds i32, i32 addrspace(1)* %q, i64 1
- %v1 = load i32, i32 addrspace(1)* %q1, align 4, !alias.scope !4, !noalias !2
+ %add.ptr = getelementptr inbounds i32, ptr addrspace(1) %p, i64 4
+ tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %p, ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %add.ptr, i64 16, i1 false), !alias.scope !2, !noalias !4
+ %v0 = load i32, ptr addrspace(1) %q, align 4, !alias.scope !4, !noalias !2
+ %q1 = getelementptr inbounds i32, ptr addrspace(1) %q, i64 1
+ %v1 = load i32, ptr addrspace(1) %q1, align 4, !alias.scope !4, !noalias !2
%add = add i32 %v0, %v1
ret i32 %add
}
; MIR-LABEL: name: test_memcpy_inline
-; MIR: [[LOAD:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 %{{[0-9]+}}, 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
-; MIR: GLOBAL_STORE_DWORDX4 %{{[0-9]+}}, killed [[LOAD]], 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
-define i32 @test_memcpy_inline(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) {
+; MIR: [[LOAD:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 %{{[0-9]+}}, 16, 0, implicit $exec :: (load (s128) from %ir.add.ptr, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
+; MIR: GLOBAL_STORE_DWORDX4 %{{[0-9]+}}, killed [[LOAD]], 0, 0, implicit $exec :: (store (s128) into %ir.p, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
+define i32 @test_memcpy_inline(ptr addrspace(1) nocapture %p, ptr addrspace(1) nocapture readonly %q) {
; Check loads of %q are scheduled ahead of that store of the memcpy on %p.
; CHECK-LABEL: test_memcpy_inline:
; CHECK-DAG: global_load_dwordx2 v[[[Q0:[0-9]+]]:[[Q1:[0-9]+]]], v[2:3], off
@@ -44,21 +42,19 @@ define i32 @test_memcpy_inline(i32 addrspace(1)* nocapture %p, i32 addrspace(1)*
; CHECK-DAG: v_add_nc_u32_e32 v{{[0-9]+}}, v[[Q0]], v[[Q1]]
; CHECK: global_store_dwordx4 v[0:1], [[PVAL]], off
; CHECK: s_setpc_b64 s[30:31]
- %p0 = bitcast i32 addrspace(1)* %p to i8 addrspace(1)*
- %add.ptr = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 4
- %p1 = bitcast i32 addrspace(1)* %add.ptr to i8 addrspace(1)*
- tail call void @llvm.memcpy.inline.p1i8.p1i8.i64(i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p0, i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !2, !noalias !4
- %v0 = load i32, i32 addrspace(1)* %q, align 4, !alias.scope !4, !noalias !2
- %q1 = getelementptr inbounds i32, i32 addrspace(1)* %q, i64 1
- %v1 = load i32, i32 addrspace(1)* %q1, align 4, !alias.scope !4, !noalias !2
+ %add.ptr = getelementptr inbounds i32, ptr addrspace(1) %p, i64 4
+ tail call void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %p, ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %add.ptr, i64 16, i1 false), !alias.scope !2, !noalias !4
+ %v0 = load i32, ptr addrspace(1) %q, align 4, !alias.scope !4, !noalias !2
+ %q1 = getelementptr inbounds i32, ptr addrspace(1) %q, i64 1
+ %v1 = load i32, ptr addrspace(1) %q1, align 4, !alias.scope !4, !noalias !2
%add = add i32 %v0, %v1
ret i32 %add
}
; MIR-LABEL: name: test_memmove
-; MIR: [[LOAD:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 %{{[0-9]+}}, 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
-; MIR: GLOBAL_STORE_DWORDX4 %{{[0-9]+}}, killed [[LOAD]], 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
-define i32 @test_memmove(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) {
+; MIR: [[LOAD:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 %{{[0-9]+}}, 16, 0, implicit $exec :: (load (s128) from %ir.add.ptr, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
+; MIR: GLOBAL_STORE_DWORDX4 %{{[0-9]+}}, killed [[LOAD]], 0, 0, implicit $exec :: (store (s128) into %ir.p, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
+define i32 @test_memmove(ptr addrspace(1) nocapture %p, ptr addrspace(1) nocapture readonly %q) {
; Check loads of %q are scheduled ahead of that store of the memmove on %p.
; CHECK-LABEL: test_memmove:
; CHECK-DAG: global_load_dwordx2 v[[[Q0:[0-9]+]]:[[Q1:[0-9]+]]], v[2:3], off
@@ -66,20 +62,18 @@ define i32 @test_memmove(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocap
; CHECK-DAG: v_add_nc_u32_e32 v{{[0-9]+}}, v[[Q0]], v[[Q1]]
; CHECK: global_store_dwordx4 v[0:1], [[PVAL]]
; CHECK: s_setpc_b64 s[30:31]
- %p0 = bitcast i32 addrspace(1)* %p to i8 addrspace(1)*
- %add.ptr = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 4
- %p1 = bitcast i32 addrspace(1)* %add.ptr to i8 addrspace(1)*
- tail call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p0, i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !2, !noalias !4
- %v0 = load i32, i32 addrspace(1)* %q, align 4, !alias.scope !4, !noalias !2
- %q1 = getelementptr inbounds i32, i32 addrspace(1)* %q, i64 1
- %v1 = load i32, i32 addrspace(1)* %q1, align 4, !alias.scope !4, !noalias !2
+ %add.ptr = getelementptr inbounds i32, ptr addrspace(1) %p, i64 4
+ tail call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %p, ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %add.ptr, i64 16, i1 false), !alias.scope !2, !noalias !4
+ %v0 = load i32, ptr addrspace(1) %q, align 4, !alias.scope !4, !noalias !2
+ %q1 = getelementptr inbounds i32, ptr addrspace(1) %q, i64 1
+ %v1 = load i32, ptr addrspace(1) %q1, align 4, !alias.scope !4, !noalias !2
%add = add i32 %v0, %v1
ret i32 %add
}
; MIR-LABEL: name: test_memset
-; MIR: GLOBAL_STORE_DWORDX4 killed %{{[0-9]+}}, killed %{{[0-9]+}}, 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
-define i32 @test_memset(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) {
+; MIR: GLOBAL_STORE_DWORDX4 killed %{{[0-9]+}}, killed %{{[0-9]+}}, 0, 0, implicit $exec :: (store (s128) into %ir.p, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
+define i32 @test_memset(ptr addrspace(1) nocapture %p, ptr addrspace(1) nocapture readonly %q) {
; Check loads of %q are scheduled ahead of that store of the memset on %p.
; CHECK-LABEL: test_memset:
; CHECK-DAG: global_load_dwordx2 v[[[Q0:[0-9]+]]:[[Q1:[0-9]+]]], v[2:3], off
@@ -87,19 +81,18 @@ define i32 @test_memset(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapt
; CHECK: global_store_dwordx4 v[0:1], v[[[PVAL]]{{:[0-9]+\]}}, off
; CHECK: v_add_nc_u32_e32 v{{[0-9]+}}, v[[Q0]], v[[Q1]]
; CHECK: s_setpc_b64 s[30:31]
- %p0 = bitcast i32 addrspace(1)* %p to i8 addrspace(1)*
- tail call void @llvm.memset.p1i8.i64(i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p0, i8 170, i64 16, i1 false), !alias.scope !2, !noalias !4
- %v0 = load i32, i32 addrspace(1)* %q, align 4, !alias.scope !4, !noalias !2
- %q1 = getelementptr inbounds i32, i32 addrspace(1)* %q, i64 1
- %v1 = load i32, i32 addrspace(1)* %q1, align 4, !alias.scope !4, !noalias !2
+ tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %p, i8 170, i64 16, i1 false), !alias.scope !2, !noalias !4
+ %v0 = load i32, ptr addrspace(1) %q, align 4, !alias.scope !4, !noalias !2
+ %q1 = getelementptr inbounds i32, ptr addrspace(1) %q, i64 1
+ %v1 = load i32, ptr addrspace(1) %q1, align 4, !alias.scope !4, !noalias !2
%add = add i32 %v0, %v1
ret i32 %add
}
-declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* noalias nocapture writeonly, i8 addrspace(1)* noalias nocapture readonly, i64, i1 immarg)
-declare void @llvm.memcpy.inline.p1i8.p1i8.i64(i8 addrspace(1)* noalias nocapture writeonly, i8 addrspace(1)* noalias nocapture readonly, i64, i1 immarg)
-declare void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(1)* nocapture readonly, i64, i1 immarg)
-declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture writeonly, i8, i64, i1 immarg)
+declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg)
+declare void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg)
+declare void @llvm.memmove.p1.p1.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg)
+declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture writeonly, i8, i64, i1 immarg)
!0 = distinct !{!0, !"bax"}
!1 = distinct !{!1, !0, !"bax: %p"}
More information about the llvm-commits
mailing list