[llvm] r324101 - [AMDGPU] Switch to the new addr space mapping by default
Yaxun Liu via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 2 08:07:17 PST 2018
Author: yaxunl
Date: Fri Feb 2 08:07:16 2018
New Revision: 324101
URL: http://llvm.org/viewvc/llvm-project?rev=324101&view=rev
Log:
[AMDGPU] Switch to the new addr space mapping by default
This requires corresponding clang change.
Differential Revision: https://reviews.llvm.org/D40955
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/trunk/test/Analysis/CostModel/AMDGPU/addrspacecast.ll
llvm/trunk/test/CodeGen/AMDGPU/InlineAsmCrash.ll
llvm/trunk/test/CodeGen/AMDGPU/addrspacecast.ll
llvm/trunk/test/CodeGen/AMDGPU/amdgcn.private-memory.ll
llvm/trunk/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll
llvm/trunk/test/CodeGen/AMDGPU/amdgpu-inline.ll
llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
llvm/trunk/test/CodeGen/AMDGPU/amdpal.ll
llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
llvm/trunk/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
llvm/trunk/test/CodeGen/AMDGPU/byval-frame-setup.ll
llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll
llvm/trunk/test/CodeGen/AMDGPU/call-graph-register-usage.ll
llvm/trunk/test/CodeGen/AMDGPU/callee-frame-setup.ll
llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll
llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll
llvm/trunk/test/CodeGen/AMDGPU/commute-compares.ll
llvm/trunk/test/CodeGen/AMDGPU/copy-to-reg.ll
llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll
llvm/trunk/test/CodeGen/AMDGPU/flat-address-space.ll
llvm/trunk/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll
llvm/trunk/test/CodeGen/AMDGPU/flat_atomics.ll
llvm/trunk/test/CodeGen/AMDGPU/flat_atomics_i64.ll
llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll
llvm/trunk/test/CodeGen/AMDGPU/function-args.ll
llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
llvm/trunk/test/CodeGen/AMDGPU/huge-private-buffer.ll
llvm/trunk/test/CodeGen/AMDGPU/indirect-private-64.ll
llvm/trunk/test/CodeGen/AMDGPU/insert_subreg.ll
llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir
llvm/trunk/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll
llvm/trunk/test/CodeGen/AMDGPU/large-alloca-compute.ll
llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll
llvm/trunk/test/CodeGen/AMDGPU/load-hi16.ll
llvm/trunk/test/CodeGen/AMDGPU/load-lo16.ll
llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-offset.ll
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll
llvm/trunk/test/CodeGen/AMDGPU/move-to-valu-worklist.ll
llvm/trunk/test/CodeGen/AMDGPU/mubuf-offset-private.ll
llvm/trunk/test/CodeGen/AMDGPU/nested-calls.ll
llvm/trunk/test/CodeGen/AMDGPU/parallelandifcollapse.ll
llvm/trunk/test/CodeGen/AMDGPU/private-access-no-objects.ll
llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll
llvm/trunk/test/CodeGen/AMDGPU/private-memory-atomics.ll
llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll
llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-globals.ll
llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll
llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll
llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll
llvm/trunk/test/CodeGen/AMDGPU/r600.alu-limits.ll
llvm/trunk/test/CodeGen/AMDGPU/r600.private-memory.ll
llvm/trunk/test/CodeGen/AMDGPU/sad.ll
llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll
llvm/trunk/test/CodeGen/AMDGPU/shl_add_ptr.ll
llvm/trunk/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
llvm/trunk/test/CodeGen/AMDGPU/stack-size-overflow.ll
llvm/trunk/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir
llvm/trunk/test/CodeGen/AMDGPU/store-hi16.ll
llvm/trunk/test/CodeGen/AMDGPU/store-private.ll
llvm/trunk/test/CodeGen/AMDGPU/store-vector-ptrs.ll
llvm/trunk/test/CodeGen/AMDGPU/syncscopes.ll
llvm/trunk/test/CodeGen/AMDGPU/target-cpu.ll
llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir
llvm/trunk/test/CodeGen/AMDGPU/waitcnt-flat.ll
llvm/trunk/test/CodeGen/AMDGPU/waitcnt-looptest.ll
llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir
llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
llvm/trunk/test/DebugInfo/AMDGPU/code-pointer-size.ll
llvm/trunk/test/DebugInfo/AMDGPU/dwarfdump-relocs.ll
llvm/trunk/test/DebugInfo/AMDGPU/pointer-address-space.ll
llvm/trunk/test/DebugInfo/AMDGPU/variable-locations.ll
llvm/trunk/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll
llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll
llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll
llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll
llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll
llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll
llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll
llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/select.ll
llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll
llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll
llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll
llvm/trunk/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll
llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Fri Feb 2 08:07:16 2018
@@ -260,24 +260,15 @@ GCNILPSchedRegistry("gcn-ilp",
static StringRef computeDataLayout(const Triple &TT) {
if (TT.getArch() == Triple::r600) {
// 32-bit pointers.
- if (TT.getEnvironmentName() == "amdgiz" ||
- TT.getEnvironmentName() == "amdgizcl")
return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
- return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
- "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
}
// 32-bit private, local, and region pointers. 64-bit global, constant and
// flat.
- if (TT.getEnvironmentName() == "amdgiz" ||
- TT.getEnvironmentName() == "amdgizcl")
return "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32"
"-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
- return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
- "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
- "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
}
LLVM_READNONE
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Fri Feb 2 08:07:16 2018
@@ -935,18 +935,10 @@ namespace llvm {
namespace AMDGPU {
AMDGPUAS getAMDGPUAS(Triple T) {
- auto Env = T.getEnvironmentName();
AMDGPUAS AS;
- if (Env == "amdgiz" || Env == "amdgizcl") {
- AS.FLAT_ADDRESS = 0;
- AS.PRIVATE_ADDRESS = 5;
- AS.REGION_ADDRESS = 4;
- }
- else {
- AS.FLAT_ADDRESS = 4;
- AS.PRIVATE_ADDRESS = 0;
- AS.REGION_ADDRESS = 5;
- }
+ AS.FLAT_ADDRESS = 0;
+ AS.PRIVATE_ADDRESS = 5;
+ AS.REGION_ADDRESS = 4;
return AS;
}
Modified: llvm/trunk/test/Analysis/CostModel/AMDGPU/addrspacecast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/AMDGPU/addrspacecast.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/AMDGPU/addrspacecast.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/AMDGPU/addrspacecast.ll Fri Feb 2 08:07:16 2018
@@ -1,45 +1,45 @@
; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck %s
; CHECK: 'addrspacecast_global_to_flat'
-; CHECK: estimated cost of 0 for {{.*}} addrspacecast i8 addrspace(1)* %ptr to i8 addrspace(4)*
-define i8 addrspace(4)* @addrspacecast_global_to_flat(i8 addrspace(1)* %ptr) #0 {
- %cast = addrspacecast i8 addrspace(1)* %ptr to i8 addrspace(4)*
- ret i8 addrspace(4)* %cast
+; CHECK: estimated cost of 0 for {{.*}} addrspacecast i8 addrspace(1)* %ptr to i8*
+define i8* @addrspacecast_global_to_flat(i8 addrspace(1)* %ptr) #0 {
+ %cast = addrspacecast i8 addrspace(1)* %ptr to i8*
+ ret i8* %cast
}
; CHECK: 'addrspacecast_global_to_flat_v2'
-; CHECK: estimated cost of 0 for {{.*}} addrspacecast <2 x i8 addrspace(1)*> %ptr to <2 x i8 addrspace(4)*>
-define <2 x i8 addrspace(4)*> @addrspacecast_global_to_flat_v2(<2 x i8 addrspace(1)*> %ptr) #0 {
- %cast = addrspacecast <2 x i8 addrspace(1)*> %ptr to <2 x i8 addrspace(4)*>
- ret <2 x i8 addrspace(4)*> %cast
+; CHECK: estimated cost of 0 for {{.*}} addrspacecast <2 x i8 addrspace(1)*> %ptr to <2 x i8*>
+define <2 x i8*> @addrspacecast_global_to_flat_v2(<2 x i8 addrspace(1)*> %ptr) #0 {
+ %cast = addrspacecast <2 x i8 addrspace(1)*> %ptr to <2 x i8*>
+ ret <2 x i8*> %cast
}
; CHECK: 'addrspacecast_global_to_flat_v32'
-; CHECK: estimated cost of 0 for {{.*}} addrspacecast <32 x i8 addrspace(1)*> %ptr to <32 x i8 addrspace(4)*>
-define <32 x i8 addrspace(4)*> @addrspacecast_global_to_flat_v32(<32 x i8 addrspace(1)*> %ptr) #0 {
- %cast = addrspacecast <32 x i8 addrspace(1)*> %ptr to <32 x i8 addrspace(4)*>
- ret <32 x i8 addrspace(4)*> %cast
+; CHECK: estimated cost of 0 for {{.*}} addrspacecast <32 x i8 addrspace(1)*> %ptr to <32 x i8*>
+define <32 x i8*> @addrspacecast_global_to_flat_v32(<32 x i8 addrspace(1)*> %ptr) #0 {
+ %cast = addrspacecast <32 x i8 addrspace(1)*> %ptr to <32 x i8*>
+ ret <32 x i8*> %cast
}
; CHECK: 'addrspacecast_local_to_flat'
-; CHECK: estimated cost of 1 for {{.*}} addrspacecast i8 addrspace(3)* %ptr to i8 addrspace(4)*
-define i8 addrspace(4)* @addrspacecast_local_to_flat(i8 addrspace(3)* %ptr) #0 {
- %cast = addrspacecast i8 addrspace(3)* %ptr to i8 addrspace(4)*
- ret i8 addrspace(4)* %cast
+; CHECK: estimated cost of 1 for {{.*}} addrspacecast i8 addrspace(3)* %ptr to i8*
+define i8* @addrspacecast_local_to_flat(i8 addrspace(3)* %ptr) #0 {
+ %cast = addrspacecast i8 addrspace(3)* %ptr to i8*
+ ret i8* %cast
}
; CHECK: 'addrspacecast_local_to_flat_v2'
-; CHECK: estimated cost of 2 for {{.*}} addrspacecast <2 x i8 addrspace(3)*> %ptr to <2 x i8 addrspace(4)*>
-define <2 x i8 addrspace(4)*> @addrspacecast_local_to_flat_v2(<2 x i8 addrspace(3)*> %ptr) #0 {
- %cast = addrspacecast <2 x i8 addrspace(3)*> %ptr to <2 x i8 addrspace(4)*>
- ret <2 x i8 addrspace(4)*> %cast
+; CHECK: estimated cost of 2 for {{.*}} addrspacecast <2 x i8 addrspace(3)*> %ptr to <2 x i8*>
+define <2 x i8*> @addrspacecast_local_to_flat_v2(<2 x i8 addrspace(3)*> %ptr) #0 {
+ %cast = addrspacecast <2 x i8 addrspace(3)*> %ptr to <2 x i8*>
+ ret <2 x i8*> %cast
}
; CHECK: 'addrspacecast_local_to_flat_v32'
-; CHECK: estimated cost of 32 for {{.*}} addrspacecast <32 x i8 addrspace(3)*> %ptr to <32 x i8 addrspace(4)*>
-define <32 x i8 addrspace(4)*> @addrspacecast_local_to_flat_v32(<32 x i8 addrspace(3)*> %ptr) #0 {
- %cast = addrspacecast <32 x i8 addrspace(3)*> %ptr to <32 x i8 addrspace(4)*>
- ret <32 x i8 addrspace(4)*> %cast
+; CHECK: estimated cost of 32 for {{.*}} addrspacecast <32 x i8 addrspace(3)*> %ptr to <32 x i8*>
+define <32 x i8*> @addrspacecast_local_to_flat_v32(<32 x i8 addrspace(3)*> %ptr) #0 {
+ %cast = addrspacecast <32 x i8 addrspace(3)*> %ptr to <32 x i8*>
+ ret <32 x i8*> %cast
}
attributes #0 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/InlineAsmCrash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/InlineAsmCrash.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/InlineAsmCrash.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/InlineAsmCrash.ll Fri Feb 2 08:07:16 2018
@@ -4,9 +4,9 @@
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: ;;#ASMEND
-define void @foo(i32* %ptr) {
+define void @foo(i32 addrspace(5)* %ptr) {
%tmp = tail call { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } asm "s_nop 0", "=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65"(i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2)
%tmp2 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %tmp, 0
- store i32 %tmp2, i32* %ptr, align 4
+ store i32 %tmp2, i32 addrspace(5)* %ptr, align 4
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/addrspacecast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/addrspacecast.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/addrspacecast.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/addrspacecast.ll Fri Feb 2 08:07:16 2018
@@ -35,8 +35,8 @@
; CI: NumSgprs: {{[0-9][0-9]+}}
; GFX9: NumSgprs: {{[0-9]+}}
define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
- %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
- store volatile i32 7, i32 addrspace(4)* %stof
+ %stof = addrspacecast i32 addrspace(3)* %ptr to i32*
+ store volatile i32 7, i32* %stof
ret void
}
@@ -73,9 +73,9 @@ define amdgpu_kernel void @use_group_to_
; CI: NumSgprs: {{[0-9][0-9]+}}
; GFX9: NumSgprs: {{[0-9]+}}
-define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #0 {
- %stof = addrspacecast i32* %ptr to i32 addrspace(4)*
- store volatile i32 7, i32 addrspace(4)* %stof
+define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #0 {
+ %stof = addrspacecast i32 addrspace(5)* %ptr to i32*
+ store volatile i32 7, i32* %stof
ret void
}
@@ -89,8 +89,8 @@ define amdgpu_kernel void @use_private_t
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #0 {
- %stof = addrspacecast i32 addrspace(1)* %ptr to i32 addrspace(4)*
- store volatile i32 7, i32 addrspace(4)* %stof
+ %stof = addrspacecast i32 addrspace(1)* %ptr to i32*
+ store volatile i32 7, i32* %stof
ret void
}
@@ -101,8 +101,8 @@ define amdgpu_kernel void @use_global_to
; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
; HSA: flat_load_dword v{{[0-9]+}}, v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #0 {
- %stof = addrspacecast i32 addrspace(2)* %ptr to i32 addrspace(4)*
- %ld = load volatile i32, i32 addrspace(4)* %stof
+ %stof = addrspacecast i32 addrspace(2)* %ptr to i32*
+ %ld = load volatile i32, i32* %stof
ret void
}
@@ -117,8 +117,8 @@ define amdgpu_kernel void @use_constant_
; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
; HSA: ds_write_b32 [[CASTPTR]], v[[K]]
-define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #0 {
- %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)*
+define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #0 {
+ %ftos = addrspacecast i32* %ptr to i32 addrspace(3)*
store volatile i32 0, i32 addrspace(3)* %ftos
ret void
}
@@ -134,9 +134,9 @@ define amdgpu_kernel void @use_flat_to_g
; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], 0, v[[VPTR_LO]]
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
; HSA: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
-define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #0 {
- %ftos = addrspacecast i32 addrspace(4)* %ptr to i32*
- store volatile i32 0, i32* %ftos
+define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #0 {
+ %ftos = addrspacecast i32* %ptr to i32 addrspace(5)*
+ store volatile i32 0, i32 addrspace(5)* %ftos
ret void
}
@@ -148,8 +148,8 @@ define amdgpu_kernel void @use_flat_to_p
; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0
; HSA: {{flat|global}}_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
-define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #0 {
- %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)*
+define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #0 {
+ %ftos = addrspacecast i32* %ptr to i32 addrspace(1)*
store volatile i32 0, i32 addrspace(1)* %ftos
ret void
}
@@ -159,8 +159,8 @@ define amdgpu_kernel void @use_flat_to_g
; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0
; HSA: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, 0x0
-define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #0 {
- %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(2)*
+define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #0 {
+ %ftos = addrspacecast i32* %ptr to i32 addrspace(2)*
load volatile i32, i32 addrspace(2)* %ftos
ret void
}
@@ -178,8 +178,8 @@ define amdgpu_kernel void @use_flat_to_c
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
- %cast = addrspacecast i32 addrspace(3)* null to i32 addrspace(4)*
- store volatile i32 7, i32 addrspace(4)* %cast
+ %cast = addrspacecast i32 addrspace(3)* null to i32*
+ store volatile i32 7, i32* %cast
ret void
}
@@ -188,7 +188,7 @@ define amdgpu_kernel void @cast_0_group_
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
; HSA: ds_write_b32 [[PTR]], [[K]]
define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 {
- %cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(3)*
+ %cast = addrspacecast i32* null to i32 addrspace(3)*
store volatile i32 7, i32 addrspace(3)* %cast
ret void
}
@@ -199,8 +199,8 @@ define amdgpu_kernel void @cast_0_flat_t
; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 {
- %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32 addrspace(4)*
- store volatile i32 7, i32 addrspace(4)* %cast
+ %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32*
+ store volatile i32 7, i32* %cast
ret void
}
@@ -209,7 +209,7 @@ define amdgpu_kernel void @cast_neg1_gro
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
; HSA: ds_write_b32 [[PTR]], [[K]]
define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
- %cast = addrspacecast i32 addrspace(4)* inttoptr (i64 -1 to i32 addrspace(4)*) to i32 addrspace(3)*
+ %cast = addrspacecast i32* inttoptr (i64 -1 to i32*) to i32 addrspace(3)*
store volatile i32 7, i32 addrspace(3)* %cast
ret void
}
@@ -224,8 +224,8 @@ define amdgpu_kernel void @cast_neg1_fla
; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
- %cast = addrspacecast i32* null to i32 addrspace(4)*
- store volatile i32 7, i32 addrspace(4)* %cast
+ %cast = addrspacecast i32 addrspace(5)* null to i32*
+ store volatile i32 7, i32* %cast
ret void
}
@@ -233,8 +233,8 @@ define amdgpu_kernel void @cast_0_privat
; HSA: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
; HSA: buffer_store_dword [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
- %cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(0)*
- store volatile i32 7, i32* %cast
+ %cast = addrspacecast i32* null to i32 addrspace(5)*
+ store volatile i32 7, i32 addrspace(5)* %cast
ret void
}
@@ -250,17 +250,17 @@ entry:
br i1 %cmp, label %local, label %global
local:
- %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)*
+ %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32*
br label %end
global:
- %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
+ %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32*
br label %end
end:
- %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
- store volatile i32 %x, i32 addrspace(4)* %fptr, align 4
-; %val = load i32, i32 addrspace(4)* %fptr, align 4
+ %fptr = phi i32* [ %flat_local, %local ], [ %flat_global, %global ]
+ store volatile i32 %x, i32* %fptr, align 4
+; %val = load i32, i32* %fptr, align 4
; store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
@@ -278,14 +278,14 @@ end:
; HSA: s_barrier
; HSA: {{flat|global}}_load_dword
define amdgpu_kernel void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
- %alloca = alloca i32, i32 9, align 4
+ %alloca = alloca i32, i32 9, align 4, addrspace(5)
%x = call i32 @llvm.amdgcn.workitem.id.x() #2
- %pptr = getelementptr i32, i32* %alloca, i32 %x
- %fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
- store volatile i32 %x, i32 addrspace(4)* %fptr
+ %pptr = getelementptr i32, i32 addrspace(5)* %alloca, i32 %x
+ %fptr = addrspacecast i32 addrspace(5)* %pptr to i32*
+ store volatile i32 %x, i32* %fptr
; Dummy call
call void @llvm.amdgcn.s.barrier() #1
- %reload = load volatile i32, i32 addrspace(4)* %fptr, align 4
+ %reload = load volatile i32, i32* %fptr, align 4
store volatile i32 %reload, i32 addrspace(1)* %out, align 4
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/amdgcn.private-memory.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdgcn.private-memory.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/amdgcn.private-memory.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/amdgcn.private-memory.ll Fri Feb 2 08:07:16 2018
@@ -17,13 +17,13 @@ declare i32 @llvm.amdgcn.workitem.id.x()
; GCN: buffer_store_dword [[RESULT]]
define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
entry:
- %0 = alloca [2 x i32]
- %1 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 0
- %2 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 1
- store i32 0, i32* %1
- store i32 1, i32* %2
- %3 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 %in
- %4 = load i32, i32* %3
+ %0 = alloca [2 x i32], addrspace(5)
+ %1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 0
+ %2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %1
+ store i32 1, i32 addrspace(5)* %2
+ %3 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 %in
+ %4 = load i32, i32 addrspace(5)* %3
%5 = call i32 @llvm.amdgcn.workitem.id.x()
%6 = add i32 %4, %5
store i32 %6, i32 addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll Fri Feb 2 08:07:16 2018
@@ -1,9 +1,9 @@
; RUN: opt -mtriple=amdgcn-- -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s
; RUN: opt -mtriple=r600-- -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s
-; CHECK: NoAlias: i8 addrspace(1)* %p1, i8* %p
+; CHECK: NoAlias: i8 addrspace(1)* %p1, i8 addrspace(5)* %p
-define void @test(i8* %p, i8 addrspace(1)* %p1) {
+define void @test(i8 addrspace(5)* %p, i8 addrspace(1)* %p1) {
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/amdgpu-inline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdgpu-inline.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/amdgpu-inline.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/amdgpu-inline.ll Fri Feb 2 08:07:16 2018
@@ -1,5 +1,5 @@
-; RUN: opt -mtriple=amdgcn--amdhsa -O3 -S -amdgpu-function-calls -inline-threshold=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-INL1 %s
-; RUN: opt -mtriple=amdgcn--amdhsa -O3 -S -amdgpu-function-calls < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-INLDEF %s
+; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -O3 -S -amdgpu-function-calls -inline-threshold=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-INL1 %s
+; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -O3 -S -amdgpu-function-calls < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-INLDEF %s
define coldcc float @foo(float %x, float %y) {
entry:
@@ -10,30 +10,30 @@ entry:
ret float %cond
}
-define coldcc void @foo_private_ptr(float* nocapture %p) {
+define coldcc void @foo_private_ptr(float addrspace(5)* nocapture %p) {
entry:
- %tmp1 = load float, float* %p, align 4
+ %tmp1 = load float, float addrspace(5)* %p, align 4
%cmp = fcmp ogt float %tmp1, 1.000000e+00
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
%div = fdiv float 1.000000e+00, %tmp1
- store float %div, float* %p, align 4
+ store float %div, float addrspace(5)* %p, align 4
br label %if.end
if.end: ; preds = %if.then, %entry
ret void
}
-define coldcc void @foo_private_ptr2(float* nocapture %p1, float* nocapture %p2) {
+define coldcc void @foo_private_ptr2(float addrspace(5)* nocapture %p1, float addrspace(5)* nocapture %p2) {
entry:
- %tmp1 = load float, float* %p1, align 4
+ %tmp1 = load float, float addrspace(5)* %p1, align 4
%cmp = fcmp ogt float %tmp1, 1.000000e+00
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
%div = fdiv float 2.000000e+00, %tmp1
- store float %div, float* %p2, align 4
+ store float %div, float addrspace(5)* %p2, align 4
br label %if.end
if.end: ; preds = %if.then, %entry
@@ -46,11 +46,11 @@ bb:
ret float %call
}
-define void @foo_noinline(float* nocapture %p) #0 {
+define void @foo_noinline(float addrspace(5)* nocapture %p) #0 {
entry:
- %tmp1 = load float, float* %p, align 4
+ %tmp1 = load float, float addrspace(5)* %p, align 4
%mul = fmul float %tmp1, 2.000000e+00
- store float %mul, float* %p, align 4
+ store float %mul, float addrspace(5)* %p, align 4
ret void
}
@@ -63,7 +63,7 @@ entry:
; GCN: tail call float @_Z3sinf(
define amdgpu_kernel void @test_inliner(float addrspace(1)* nocapture %a, i32 %n) {
entry:
- %pvt_arr = alloca [64 x float], align 4
+ %pvt_arr = alloca [64 x float], align 4, addrspace(5)
%tid = tail call i32 @llvm.amdgcn.workitem.id.x()
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i32 %tid
%tmp2 = load float, float addrspace(1)* %arrayidx, align 4
@@ -72,22 +72,22 @@ entry:
%tmp5 = load float, float addrspace(1)* %arrayidx2, align 4
%c1 = tail call coldcc float @foo(float %tmp2, float %tmp5)
%or = or i32 %tid, %n
- %arrayidx5 = getelementptr inbounds [64 x float], [64 x float]* %pvt_arr, i32 0, i32 %or
- store float %c1, float* %arrayidx5, align 4
- %arrayidx7 = getelementptr inbounds [64 x float], [64 x float]* %pvt_arr, i32 0, i32 %or
- call coldcc void @foo_private_ptr(float* %arrayidx7)
- %arrayidx8 = getelementptr inbounds [64 x float], [64 x float]* %pvt_arr, i32 0, i32 1
- %arrayidx9 = getelementptr inbounds [64 x float], [64 x float]* %pvt_arr, i32 0, i32 2
- call coldcc void @foo_private_ptr2(float* %arrayidx8, float* %arrayidx9)
- call void @foo_noinline(float* %arrayidx7)
+ %arrayidx5 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %or
+ store float %c1, float addrspace(5)* %arrayidx5, align 4
+ %arrayidx7 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %or
+ call coldcc void @foo_private_ptr(float addrspace(5)* %arrayidx7)
+ %arrayidx8 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 1
+ %arrayidx9 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 2
+ call coldcc void @foo_private_ptr2(float addrspace(5)* %arrayidx8, float addrspace(5)* %arrayidx9)
+ call void @foo_noinline(float addrspace(5)* %arrayidx7)
%and = and i32 %tid, %n
- %arrayidx11 = getelementptr inbounds [64 x float], [64 x float]* %pvt_arr, i32 0, i32 %and
- %tmp12 = load float, float* %arrayidx11, align 4
+ %arrayidx11 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %and
+ %tmp12 = load float, float addrspace(5)* %arrayidx11, align 4
%c2 = call coldcc float @sin_wrapper(float %tmp12)
- store float %c2, float* %arrayidx7, align 4
+ store float %c2, float addrspace(5)* %arrayidx7, align 4
%xor = xor i32 %tid, %n
- %arrayidx16 = getelementptr inbounds [64 x float], [64 x float]* %pvt_arr, i32 0, i32 %xor
- %tmp16 = load float, float* %arrayidx16, align 4
+ %arrayidx16 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %xor
+ %tmp16 = load float, float addrspace(5)* %arrayidx16, align 4
store float %tmp16, float addrspace(1)* %arrayidx, align 4
ret void
}
@@ -96,23 +96,23 @@ entry:
; GCN: %div.i{{[0-9]*}} = fdiv float 2.000000e+00, %tmp1.i
define amdgpu_kernel void @test_inliner_multi_pvt_ptr(float addrspace(1)* nocapture %a, i32 %n, float %v) {
entry:
- %pvt_arr1 = alloca [32 x float], align 4
- %pvt_arr2 = alloca [32 x float], align 4
+ %pvt_arr1 = alloca [32 x float], align 4, addrspace(5)
+ %pvt_arr2 = alloca [32 x float], align 4, addrspace(5)
%tid = tail call i32 @llvm.amdgcn.workitem.id.x()
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i32 %tid
%or = or i32 %tid, %n
- %arrayidx4 = getelementptr inbounds [32 x float], [32 x float]* %pvt_arr1, i32 0, i32 %or
- %arrayidx5 = getelementptr inbounds [32 x float], [32 x float]* %pvt_arr2, i32 0, i32 %or
- store float %v, float* %arrayidx4, align 4
- store float %v, float* %arrayidx5, align 4
- %arrayidx8 = getelementptr inbounds [32 x float], [32 x float]* %pvt_arr1, i32 0, i32 1
- %arrayidx9 = getelementptr inbounds [32 x float], [32 x float]* %pvt_arr2, i32 0, i32 2
- call coldcc void @foo_private_ptr2(float* %arrayidx8, float* %arrayidx9)
+ %arrayidx4 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr1, i32 0, i32 %or
+ %arrayidx5 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr2, i32 0, i32 %or
+ store float %v, float addrspace(5)* %arrayidx4, align 4
+ store float %v, float addrspace(5)* %arrayidx5, align 4
+ %arrayidx8 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr1, i32 0, i32 1
+ %arrayidx9 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr2, i32 0, i32 2
+ call coldcc void @foo_private_ptr2(float addrspace(5)* %arrayidx8, float addrspace(5)* %arrayidx9)
%xor = xor i32 %tid, %n
- %arrayidx15 = getelementptr inbounds [32 x float], [32 x float]* %pvt_arr1, i32 0, i32 %xor
- %arrayidx16 = getelementptr inbounds [32 x float], [32 x float]* %pvt_arr2, i32 0, i32 %xor
- %tmp15 = load float, float* %arrayidx15, align 4
- %tmp16 = load float, float* %arrayidx16, align 4
+ %arrayidx15 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr1, i32 0, i32 %xor
+ %arrayidx16 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr2, i32 0, i32 %xor
+ %tmp15 = load float, float addrspace(5)* %arrayidx15, align 4
+ %tmp16 = load float, float addrspace(5)* %arrayidx16, align 4
%tmp17 = fadd float %tmp15, %tmp16
store float %tmp17, float addrspace(1)* %arrayidx, align 4
ret void
@@ -123,23 +123,23 @@ entry:
; GCN-INLDEF: %div.i{{[0-9]*}} = fdiv float 2.000000e+00, %tmp1.i
define amdgpu_kernel void @test_inliner_multi_pvt_ptr_cutoff(float addrspace(1)* nocapture %a, i32 %n, float %v) {
entry:
- %pvt_arr1 = alloca [32 x float], align 4
- %pvt_arr2 = alloca [33 x float], align 4
+ %pvt_arr1 = alloca [32 x float], align 4, addrspace(5)
+ %pvt_arr2 = alloca [33 x float], align 4, addrspace(5)
%tid = tail call i32 @llvm.amdgcn.workitem.id.x()
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i32 %tid
%or = or i32 %tid, %n
- %arrayidx4 = getelementptr inbounds [32 x float], [32 x float]* %pvt_arr1, i32 0, i32 %or
- %arrayidx5 = getelementptr inbounds [33 x float], [33 x float]* %pvt_arr2, i32 0, i32 %or
- store float %v, float* %arrayidx4, align 4
- store float %v, float* %arrayidx5, align 4
- %arrayidx8 = getelementptr inbounds [32 x float], [32 x float]* %pvt_arr1, i32 0, i32 1
- %arrayidx9 = getelementptr inbounds [33 x float], [33 x float]* %pvt_arr2, i32 0, i32 2
- call coldcc void @foo_private_ptr2(float* %arrayidx8, float* %arrayidx9)
+ %arrayidx4 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr1, i32 0, i32 %or
+ %arrayidx5 = getelementptr inbounds [33 x float], [33 x float] addrspace(5)* %pvt_arr2, i32 0, i32 %or
+ store float %v, float addrspace(5)* %arrayidx4, align 4
+ store float %v, float addrspace(5)* %arrayidx5, align 4
+ %arrayidx8 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr1, i32 0, i32 1
+ %arrayidx9 = getelementptr inbounds [33 x float], [33 x float] addrspace(5)* %pvt_arr2, i32 0, i32 2
+ call coldcc void @foo_private_ptr2(float addrspace(5)* %arrayidx8, float addrspace(5)* %arrayidx9)
%xor = xor i32 %tid, %n
- %arrayidx15 = getelementptr inbounds [32 x float], [32 x float]* %pvt_arr1, i32 0, i32 %xor
- %arrayidx16 = getelementptr inbounds [33 x float], [33 x float]* %pvt_arr2, i32 0, i32 %xor
- %tmp15 = load float, float* %arrayidx15, align 4
- %tmp16 = load float, float* %arrayidx16, align 4
+ %arrayidx15 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr1, i32 0, i32 %xor
+ %arrayidx16 = getelementptr inbounds [33 x float], [33 x float] addrspace(5)* %pvt_arr2, i32 0, i32 %xor
+ %tmp15 = load float, float addrspace(5)* %arrayidx15, align 4
+ %tmp16 = load float, float addrspace(5)* %arrayidx16, align 4
%tmp17 = fadd float %tmp15, %tmp16
store float %tmp17, float addrspace(1)* %arrayidx, align 4
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll Fri Feb 2 08:07:16 2018
@@ -5,8 +5,8 @@
; RUN: llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
-; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s
-; RUN: opt -S -mtriple=amdgcn-unknown-unknown -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s
+; RUN: opt -S -mtriple=amdgcn-unknown-unknown -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
@@ -80,19 +80,19 @@
; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.z(), !range !1
define amdgpu_kernel void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
entry:
- %stack = alloca [5 x i32], align 4
+ %stack = alloca [5 x i32], align 4, addrspace(5)
%0 = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
- store i32 4, i32* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0
+ store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
- store i32 5, i32* %arrayidx3, align 4
- %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
- %2 = load i32, i32* %arrayidx10, align 4
+ %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1
+ store i32 5, i32 addrspace(5)* %arrayidx3, align 4
+ %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
+ %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %2, i32 addrspace(1)* %out, align 4
- %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
- %3 = load i32, i32* %arrayidx12
+ %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
+ %3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %3, i32 addrspace(1)* %arrayidx13
ret void
@@ -102,19 +102,19 @@ entry:
; OPT: getelementptr inbounds [256 x [8 x i32]], [256 x [8 x i32]] addrspace(3)* @high_alignment.stack, i32 0, i32 %{{[0-9]+}}
define amdgpu_kernel void @high_alignment(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
entry:
- %stack = alloca [8 x i32], align 16
+ %stack = alloca [8 x i32], align 16, addrspace(5)
%0 = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [8 x i32], [8 x i32]* %stack, i32 0, i32 %0
- store i32 4, i32* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [8 x i32], [8 x i32] addrspace(5)* %stack, i32 0, i32 %0
+ store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [8 x i32], [8 x i32]* %stack, i32 0, i32 %1
- store i32 5, i32* %arrayidx3, align 4
- %arrayidx10 = getelementptr inbounds [8 x i32], [8 x i32]* %stack, i32 0, i32 0
- %2 = load i32, i32* %arrayidx10, align 4
+ %arrayidx3 = getelementptr inbounds [8 x i32], [8 x i32] addrspace(5)* %stack, i32 0, i32 %1
+ store i32 5, i32 addrspace(5)* %arrayidx3, align 4
+ %arrayidx10 = getelementptr inbounds [8 x i32], [8 x i32] addrspace(5)* %stack, i32 0, i32 0
+ %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %2, i32 addrspace(1)* %out, align 4
- %arrayidx12 = getelementptr inbounds [8 x i32], [8 x i32]* %stack, i32 0, i32 1
- %3 = load i32, i32* %arrayidx12
+ %arrayidx12 = getelementptr inbounds [8 x i32], [8 x i32] addrspace(5)* %stack, i32 0, i32 1
+ %3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %3, i32 addrspace(1)* %arrayidx13
ret void
@@ -127,19 +127,19 @@ entry:
; SI-NOT: ds_write
define amdgpu_kernel void @no_replace_inbounds_gep(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
entry:
- %stack = alloca [5 x i32], align 4
+ %stack = alloca [5 x i32], align 4, addrspace(5)
%0 = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
- store i32 4, i32* %arrayidx1, align 4
+ %arrayidx1 = getelementptr [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0
+ store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
- store i32 5, i32* %arrayidx3, align 4
- %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
- %2 = load i32, i32* %arrayidx10, align 4
+ %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1
+ store i32 5, i32 addrspace(5)* %arrayidx3, align 4
+ %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
+ %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %2, i32 addrspace(1)* %out, align 4
- %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
- %3 = load i32, i32* %arrayidx12
+ %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
+ %3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %3, i32 addrspace(1)* %arrayidx13
ret void
@@ -162,20 +162,20 @@ entry:
define amdgpu_kernel void @multiple_structs(i32 addrspace(1)* %out) #0 {
entry:
- %a = alloca %struct.point
- %b = alloca %struct.point
- %a.x.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 0
- %a.y.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 1
- %b.x.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 0
- %b.y.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 1
- store i32 0, i32* %a.x.ptr
- store i32 1, i32* %a.y.ptr
- store i32 2, i32* %b.x.ptr
- store i32 3, i32* %b.y.ptr
- %a.indirect.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 0
- %b.indirect.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 0
- %a.indirect = load i32, i32* %a.indirect.ptr
- %b.indirect = load i32, i32* %b.indirect.ptr
+ %a = alloca %struct.point, addrspace(5)
+ %b = alloca %struct.point, addrspace(5)
+ %a.x.ptr = getelementptr %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 0
+ %a.y.ptr = getelementptr %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 1
+ %b.x.ptr = getelementptr %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 0
+ %b.y.ptr = getelementptr %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %a.x.ptr
+ store i32 1, i32 addrspace(5)* %a.y.ptr
+ store i32 2, i32 addrspace(5)* %b.x.ptr
+ store i32 3, i32 addrspace(5)* %b.y.ptr
+ %a.indirect.ptr = getelementptr %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 0
+ %b.indirect.ptr = getelementptr %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 0
+ %a.indirect = load i32, i32 addrspace(5)* %a.indirect.ptr
+ %b.indirect = load i32, i32 addrspace(5)* %b.indirect.ptr
%0 = add i32 %a.indirect, %b.indirect
store i32 %0, i32 addrspace(1)* %out
ret void
@@ -191,32 +191,32 @@ entry:
define amdgpu_kernel void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
entry:
- %prv_array_const = alloca [2 x i32]
- %prv_array = alloca [2 x i32]
+ %prv_array_const = alloca [2 x i32], addrspace(5)
+ %prv_array = alloca [2 x i32], addrspace(5)
%a = load i32, i32 addrspace(1)* %in
%b_src_ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%b = load i32, i32 addrspace(1)* %b_src_ptr
- %a_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0
- store i32 %a, i32* %a_dst_ptr
- %b_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 1
- store i32 %b, i32* %b_dst_ptr
+ %a_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 0
+ store i32 %a, i32 addrspace(5)* %a_dst_ptr
+ %b_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 1
+ store i32 %b, i32 addrspace(5)* %b_dst_ptr
br label %for.body
for.body:
%inc = phi i32 [0, %entry], [%count, %for.body]
- %x_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0
- %x = load i32, i32* %x_ptr
- %y_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0
- %y = load i32, i32* %y_ptr
+ %x_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 0
+ %x = load i32, i32 addrspace(5)* %x_ptr
+ %y_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array, i32 0, i32 0
+ %y = load i32, i32 addrspace(5)* %y_ptr
%xy = add i32 %x, %y
- store i32 %xy, i32* %y_ptr
+ store i32 %xy, i32 addrspace(5)* %y_ptr
%count = add i32 %inc, 1
%done = icmp eq i32 %count, 4095
br i1 %done, label %for.end, label %for.body
for.end:
- %value_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0
- %value = load i32, i32* %value_ptr
+ %value_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array, i32 0, i32 0
+ %value = load i32, i32 addrspace(5)* %value_ptr
store i32 %value, i32 addrspace(1)* %out
ret void
}
@@ -235,13 +235,13 @@ for.end:
; SI-PROMOTE: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[SCALED_IDX]], 16
define amdgpu_kernel void @short_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
- %0 = alloca [2 x i16]
- %1 = getelementptr inbounds [2 x i16], [2 x i16]* %0, i32 0, i32 0
- %2 = getelementptr inbounds [2 x i16], [2 x i16]* %0, i32 0, i32 1
- store i16 0, i16* %1
- store i16 1, i16* %2
- %3 = getelementptr inbounds [2 x i16], [2 x i16]* %0, i32 0, i32 %index
- %4 = load i16, i16* %3
+ %0 = alloca [2 x i16], addrspace(5)
+ %1 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 0
+ %2 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 1
+ store i16 0, i16 addrspace(5)* %1
+ store i16 1, i16 addrspace(5)* %2
+ %3 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 %index
+ %4 = load i16, i16 addrspace(5)* %3
%5 = sext i16 %4 to i32
store i32 %5, i32 addrspace(1)* %out
ret void
@@ -258,13 +258,13 @@ entry:
; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:5 ; encoding: [0x05,0x00,0x60,0xe0
define amdgpu_kernel void @char_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
- %0 = alloca [2 x i8]
- %1 = getelementptr inbounds [2 x i8], [2 x i8]* %0, i32 0, i32 0
- %2 = getelementptr inbounds [2 x i8], [2 x i8]* %0, i32 0, i32 1
- store i8 0, i8* %1
- store i8 1, i8* %2
- %3 = getelementptr inbounds [2 x i8], [2 x i8]* %0, i32 0, i32 %index
- %4 = load i8, i8* %3
+ %0 = alloca [2 x i8], addrspace(5)
+ %1 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 0
+ %2 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 1
+ store i8 0, i8 addrspace(5)* %1
+ store i8 1, i8 addrspace(5)* %2
+ %3 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 %index
+ %4 = load i8, i8 addrspace(5)* %3
%5 = sext i8 %4 to i32
store i32 %5, i32 addrspace(1)* %out
ret void
@@ -281,22 +281,22 @@ entry:
; SI: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4 ;
define amdgpu_kernel void @no_overlap(i32 addrspace(1)* %out, i32 %in) #0 {
entry:
- %0 = alloca [3 x i8], align 1
- %1 = alloca [2 x i8], align 1
- %2 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 0
- %3 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 1
- %4 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 2
- %5 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 0
- %6 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 1
- store i8 0, i8* %2
- store i8 1, i8* %3
- store i8 2, i8* %4
- store i8 1, i8* %5
- store i8 0, i8* %6
- %7 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 %in
- %8 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 %in
- %9 = load i8, i8* %7
- %10 = load i8, i8* %8
+ %0 = alloca [3 x i8], align 1, addrspace(5)
+ %1 = alloca [2 x i8], align 1, addrspace(5)
+ %2 = getelementptr [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 0
+ %3 = getelementptr [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 1
+ %4 = getelementptr [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 2
+ %5 = getelementptr [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 0
+ %6 = getelementptr [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 1
+ store i8 0, i8 addrspace(5)* %2
+ store i8 1, i8 addrspace(5)* %3
+ store i8 2, i8 addrspace(5)* %4
+ store i8 1, i8 addrspace(5)* %5
+ store i8 0, i8 addrspace(5)* %6
+ %7 = getelementptr [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 %in
+ %8 = getelementptr [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 %in
+ %9 = load i8, i8 addrspace(5)* %7
+ %10 = load i8, i8 addrspace(5)* %8
%11 = add i8 %9, %10
%12 = sext i8 %11 to i32
store i32 %12, i32 addrspace(1)* %out
@@ -305,13 +305,13 @@ entry:
define amdgpu_kernel void @char_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
- %alloca = alloca [2 x [2 x i8]]
- %gep0 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 1
- store i8 0, i8* %gep0
- store i8 1, i8* %gep1
- %gep2 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 %index
- %load = load i8, i8* %gep2
+ %alloca = alloca [2 x [2 x i8]], addrspace(5)
+ %gep0 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 0
+ %gep1 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 1
+ store i8 0, i8 addrspace(5)* %gep0
+ store i8 1, i8 addrspace(5)* %gep1
+ %gep2 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index
+ %load = load i8, i8 addrspace(5)* %gep2
%sext = sext i8 %load to i32
store i32 %sext, i32 addrspace(1)* %out
ret void
@@ -319,26 +319,26 @@ entry:
define amdgpu_kernel void @i32_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
- %alloca = alloca [2 x [2 x i32]]
- %gep0 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
- store i32 0, i32* %gep0
- store i32 1, i32* %gep1
- %gep2 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
- %load = load i32, i32* %gep2
+ %alloca = alloca [2 x [2 x i32]], addrspace(5)
+ %gep0 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 0
+ %gep1 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %gep0
+ store i32 1, i32 addrspace(5)* %gep1
+ %gep2 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index
+ %load = load i32, i32 addrspace(5)* %gep2
store i32 %load, i32 addrspace(1)* %out
ret void
}
define amdgpu_kernel void @i64_array_array(i64 addrspace(1)* %out, i32 %index) #0 {
entry:
- %alloca = alloca [2 x [2 x i64]]
- %gep0 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 1
- store i64 0, i64* %gep0
- store i64 1, i64* %gep1
- %gep2 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 %index
- %load = load i64, i64* %gep2
+ %alloca = alloca [2 x [2 x i64]], addrspace(5)
+ %gep0 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 0
+ %gep1 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 1
+ store i64 0, i64 addrspace(5)* %gep0
+ store i64 1, i64 addrspace(5)* %gep1
+ %gep2 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index
+ %load = load i64, i64 addrspace(5)* %gep2
store i64 %load, i64 addrspace(1)* %out
ret void
}
@@ -347,40 +347,40 @@ entry:
define amdgpu_kernel void @struct_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
- %alloca = alloca [2 x [2 x %struct.pair32]]
- %gep0 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 0, i32 1
- %gep1 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 1, i32 1
- store i32 0, i32* %gep0
- store i32 1, i32* %gep1
- %gep2 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 %index, i32 0
- %load = load i32, i32* %gep2
+ %alloca = alloca [2 x [2 x %struct.pair32]], addrspace(5)
+ %gep0 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 0, i32 1
+ %gep1 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 1, i32 1
+ store i32 0, i32 addrspace(5)* %gep0
+ store i32 1, i32 addrspace(5)* %gep1
+ %gep2 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index, i32 0
+ %load = load i32, i32 addrspace(5)* %gep2
store i32 %load, i32 addrspace(1)* %out
ret void
}
define amdgpu_kernel void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
- %alloca = alloca [2 x %struct.pair32]
- %gep0 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 0, i32 1
- %gep1 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 1, i32 0
- store i32 0, i32* %gep0
- store i32 1, i32* %gep1
- %gep2 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 %index, i32 0
- %load = load i32, i32* %gep2
+ %alloca = alloca [2 x %struct.pair32], addrspace(5)
+ %gep0 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 0, i32 1
+ %gep1 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 1, i32 0
+ store i32 0, i32 addrspace(5)* %gep0
+ store i32 1, i32 addrspace(5)* %gep1
+ %gep2 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 %index, i32 0
+ %load = load i32, i32 addrspace(5)* %gep2
store i32 %load, i32 addrspace(1)* %out
ret void
}
define amdgpu_kernel void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind {
entry:
- %tmp = alloca [2 x i32]
- %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
- store i32 0, i32* %tmp1
- store i32 1, i32* %tmp2
+ %tmp = alloca [2 x i32], addrspace(5)
+ %tmp1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %tmp1
+ store i32 1, i32 addrspace(5)* %tmp2
%cmp = icmp eq i32 %in, 0
- %sel = select i1 %cmp, i32* %tmp1, i32* %tmp2
- %load = load i32, i32* %sel
+ %sel = select i1 %cmp, i32 addrspace(5)* %tmp1, i32 addrspace(5)* %tmp2
+ %load = load i32, i32 addrspace(5)* %sel
store i32 %load, i32 addrspace(1)* %out
ret void
}
@@ -394,14 +394,14 @@ entry:
; SI: v_add_{{[iu]}}32_e32 [[ADD_OFFSET:v[0-9]+]], vcc, 5,
; SI: buffer_load_dword v{{[0-9]+}}, [[ADD_OFFSET:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ;
define amdgpu_kernel void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
- %alloca = alloca [16 x i32]
- %tmp0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
- store i32 5, i32* %tmp0
- %tmp1 = ptrtoint [16 x i32]* %alloca to i32
+ %alloca = alloca [16 x i32], addrspace(5)
+ %tmp0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a
+ store i32 5, i32 addrspace(5)* %tmp0
+ %tmp1 = ptrtoint [16 x i32] addrspace(5)* %alloca to i32
%tmp2 = add i32 %tmp1, 5
- %tmp3 = inttoptr i32 %tmp2 to i32*
- %tmp4 = getelementptr i32, i32* %tmp3, i32 %b
- %tmp5 = load i32, i32* %tmp4
+ %tmp3 = inttoptr i32 %tmp2 to i32 addrspace(5)*
+ %tmp4 = getelementptr i32, i32 addrspace(5)* %tmp3, i32 %b
+ %tmp5 = load i32, i32 addrspace(5)* %tmp4
store i32 %tmp5, i32 addrspace(1)* %out
ret void
}
@@ -411,15 +411,15 @@ define amdgpu_kernel void @ptrtoint(i32
; OPT: load i32 addrspace(1)*, i32 addrspace(1)* addrspace(3)* %{{[0-9]+}}, align 4
define amdgpu_kernel void @pointer_typed_alloca(i32 addrspace(1)* %A) {
entry:
- %A.addr = alloca i32 addrspace(1)*, align 4
- store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
- %ld0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4
+ %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
+ %ld0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %ld0, i32 0
store i32 1, i32 addrspace(1)* %arrayidx, align 4
- %ld1 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4
+ %ld1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %ld1, i32 1
store i32 2, i32 addrspace(1)* %arrayidx1, align 4
- %ld2 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4
+ %ld2 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %ld2, i32 2
store i32 3, i32 addrspace(1)* %arrayidx2, align 4
ret void
@@ -462,9 +462,9 @@ entry:
; SI: buffer_load_dword
define amdgpu_kernel void @v16i32_stack(<16 x i32> addrspace(1)* %out, i32 %a) {
- %alloca = alloca [2 x <16 x i32>]
- %tmp0 = getelementptr [2 x <16 x i32>], [2 x <16 x i32>]* %alloca, i32 0, i32 %a
- %tmp5 = load <16 x i32>, <16 x i32>* %tmp0
+ %alloca = alloca [2 x <16 x i32>], addrspace(5)
+ %tmp0 = getelementptr [2 x <16 x i32>], [2 x <16 x i32>] addrspace(5)* %alloca, i32 0, i32 %a
+ %tmp5 = load <16 x i32>, <16 x i32> addrspace(5)* %tmp0
store <16 x i32> %tmp5, <16 x i32> addrspace(1)* %out
ret void
}
@@ -506,9 +506,9 @@ define amdgpu_kernel void @v16i32_stack(
; SI: buffer_load_dword
define amdgpu_kernel void @v16float_stack(<16 x float> addrspace(1)* %out, i32 %a) {
- %alloca = alloca [2 x <16 x float>]
- %tmp0 = getelementptr [2 x <16 x float>], [2 x <16 x float>]* %alloca, i32 0, i32 %a
- %tmp5 = load <16 x float>, <16 x float>* %tmp0
+ %alloca = alloca [2 x <16 x float>], addrspace(5)
+ %tmp0 = getelementptr [2 x <16 x float>], [2 x <16 x float>] addrspace(5)* %alloca, i32 0, i32 %a
+ %tmp5 = load <16 x float>, <16 x float> addrspace(5)* %tmp0
store <16 x float> %tmp5, <16 x float> addrspace(1)* %out
ret void
}
@@ -522,9 +522,9 @@ define amdgpu_kernel void @v16float_stac
; SI: buffer_load_dword
define amdgpu_kernel void @v2float_stack(<2 x float> addrspace(1)* %out, i32 %a) {
- %alloca = alloca [16 x <2 x float>]
- %tmp0 = getelementptr [16 x <2 x float>], [16 x <2 x float>]* %alloca, i32 0, i32 %a
- %tmp5 = load <2 x float>, <2 x float>* %tmp0
+ %alloca = alloca [16 x <2 x float>], addrspace(5)
+ %tmp0 = getelementptr [16 x <2 x float>], [16 x <2 x float>] addrspace(5)* %alloca, i32 0, i32 %a
+ %tmp5 = load <2 x float>, <2 x float> addrspace(5)* %tmp0
store <2 x float> %tmp5, <2 x float> addrspace(1)* %out
ret void
}
@@ -534,9 +534,9 @@ define amdgpu_kernel void @v2float_stack
; OPT: load [0 x i32], [0 x i32] addrspace(3)*
define amdgpu_kernel void @direct_alloca_read_0xi32([0 x i32] addrspace(1)* %out, i32 %index) {
entry:
- %tmp = alloca [0 x i32]
- store [0 x i32] [], [0 x i32]* %tmp
- %load = load [0 x i32], [0 x i32]* %tmp
+ %tmp = alloca [0 x i32], addrspace(5)
+ store [0 x i32] [], [0 x i32] addrspace(5)* %tmp
+ %load = load [0 x i32], [0 x i32] addrspace(5)* %tmp
store [0 x i32] %load, [0 x i32] addrspace(1)* %out
ret void
}
@@ -546,9 +546,9 @@ entry:
; OPT: load [1 x i32], [1 x i32] addrspace(3)*
define amdgpu_kernel void @direct_alloca_read_1xi32([1 x i32] addrspace(1)* %out, i32 %index) {
entry:
- %tmp = alloca [1 x i32]
- store [1 x i32] [i32 0], [1 x i32]* %tmp
- %load = load [1 x i32], [1 x i32]* %tmp
+ %tmp = alloca [1 x i32], addrspace(5)
+ store [1 x i32] [i32 0], [1 x i32] addrspace(5)* %tmp
+ %load = load [1 x i32], [1 x i32] addrspace(5)* %tmp
store [1 x i32] %load, [1 x i32] addrspace(1)* %out
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/amdpal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdpal.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/amdpal.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/amdpal.ll Fri Feb 2 08:07:16 2018
@@ -17,14 +17,14 @@ entry:
; PAL: s_load_dwordx4 s{{\[}}[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s{{\[}}[[GITPTR]]:
; PAL: buffer_store{{.*}}, s{{\[}}[[SCRATCHDESC]]:
-define amdgpu_kernel void @scratch(<2 x i32> %in, i32 %idx, i32* %out) {
+define amdgpu_kernel void @scratch(<2 x i32> %in, i32 %idx, i32 addrspace(5)* %out) {
entry:
- %v = alloca [2 x i32]
- %vv = bitcast [2 x i32]* %v to <2 x i32>*
- store <2 x i32> %in, <2 x i32>* %vv
- %e = getelementptr [2 x i32], [2 x i32]* %v, i32 0, i32 %idx
- %x = load i32, i32* %e
- store i32 %x, i32* %out
+ %v = alloca [2 x i32], addrspace(5)
+ %vv = bitcast [2 x i32] addrspace(5)* %v to <2 x i32> addrspace(5)*
+ store <2 x i32> %in, <2 x i32> addrspace(5)* %vv
+ %e = getelementptr [2 x i32], [2 x i32] addrspace(5)* %v, i32 0, i32 %idx
+ %x = load i32, i32 addrspace(5)* %e
+ store i32 %x, i32 addrspace(5)* %out
ret void
}
@@ -41,14 +41,14 @@ entry:
; PAL: s_load_dwordx4 s{{\[}}[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s{{\[}}[[GITPTR]]:
; PAL: buffer_store{{.*}}, s{{\[}}[[SCRATCHDESC]]:
-define amdgpu_kernel void @scratch2(<2 x i32> %in, i32 %idx, i32* %out) #0 {
+define amdgpu_kernel void @scratch2(<2 x i32> %in, i32 %idx, i32 addrspace(5)* %out) #0 {
entry:
- %v = alloca [2 x i32]
- %vv = bitcast [2 x i32]* %v to <2 x i32>*
- store <2 x i32> %in, <2 x i32>* %vv
- %e = getelementptr [2 x i32], [2 x i32]* %v, i32 0, i32 %idx
- %x = load i32, i32* %e
- store i32 %x, i32* %out
+ %v = alloca [2 x i32], addrspace(5)
+ %vv = bitcast [2 x i32] addrspace(5)* %v to <2 x i32> addrspace(5)*
+ store <2 x i32> %in, <2 x i32> addrspace(5)* %vv
+ %e = getelementptr [2 x i32], [2 x i32] addrspace(5)* %v, i32 0, i32 %idx
+ %x = load i32, i32 addrspace(5)* %e
+ store i32 %x, i32 addrspace(5)* %out
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll Fri Feb 2 08:07:16 2018
@@ -176,57 +176,57 @@ define amdgpu_kernel void @use_kernarg_s
; HSA: define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 {
define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
- %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
- store volatile i32 0, i32 addrspace(4)* %stof
+ %stof = addrspacecast i32 addrspace(3)* %ptr to i32*
+ store volatile i32 0, i32* %stof
ret void
}
-; HSA: define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #11 {
-define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #1 {
- %stof = addrspacecast i32* %ptr to i32 addrspace(4)*
- store volatile i32 0, i32 addrspace(4)* %stof
+; HSA: define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #11 {
+define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #1 {
+ %stof = addrspacecast i32 addrspace(5)* %ptr to i32*
+ store volatile i32 0, i32* %stof
ret void
}
-; HSA: define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 {
-define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 {
- %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)*
+; HSA: define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 {
+define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 {
+ %ftos = addrspacecast i32* %ptr to i32 addrspace(3)*
store volatile i32 0, i32 addrspace(3)* %ftos
ret void
}
-; HSA: define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 {
-define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 {
- %ftos = addrspacecast i32 addrspace(4)* %ptr to i32*
- store volatile i32 0, i32* %ftos
+; HSA: define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 {
+define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 {
+ %ftos = addrspacecast i32* %ptr to i32 addrspace(5)*
+ store volatile i32 0, i32 addrspace(5)* %ftos
ret void
}
; No-op addrspacecast should not use queue ptr
; HSA: define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
- %stof = addrspacecast i32 addrspace(1)* %ptr to i32 addrspace(4)*
- store volatile i32 0, i32 addrspace(4)* %stof
+ %stof = addrspacecast i32 addrspace(1)* %ptr to i32*
+ store volatile i32 0, i32* %stof
ret void
}
; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 {
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 {
- %stof = addrspacecast i32 addrspace(2)* %ptr to i32 addrspace(4)*
- %ld = load volatile i32, i32 addrspace(4)* %stof
+ %stof = addrspacecast i32 addrspace(2)* %ptr to i32*
+ %ld = load volatile i32, i32* %stof
ret void
}
-; HSA: define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 {
-define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 {
- %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)*
+; HSA: define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 {
+define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 {
+ %ftos = addrspacecast i32* %ptr to i32 addrspace(1)*
store volatile i32 0, i32 addrspace(1)* %ftos
ret void
}
-; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 {
-define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 {
- %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(2)*
+; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
+define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
+ %ftos = addrspacecast i32* %ptr to i32 addrspace(2)*
%ld = load volatile i32, i32 addrspace(2)* %ftos
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll Fri Feb 2 08:07:16 2018
@@ -20,12 +20,12 @@ declare void @llvm.amdgcn.s.barrier() #2
; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
; alloca to a vector. It currently fails because it does not know how
; to interpret:
-; getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 1, i32 %b
+; getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 1, i32 %b
; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 64
; SI-PROMOTE: ds_write_b32 [[PTRREG]]
define amdgpu_kernel void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) #0 {
- %alloca = alloca [16 x i32], align 16
+ %alloca = alloca [16 x i32], align 16, addrspace(5)
%mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0);
%tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo)
%a_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inA, i32 %tid
@@ -33,11 +33,11 @@ define amdgpu_kernel void @test_private_
%a = load i32, i32 addrspace(1)* %a_ptr, !range !0
%b = load i32, i32 addrspace(1)* %b_ptr, !range !0
%result = add i32 %a, %b
- %alloca_ptr = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 1, i32 %b
- store i32 %result, i32* %alloca_ptr, align 4
+ %alloca_ptr = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 1, i32 %b
+ store i32 %result, i32 addrspace(5)* %alloca_ptr, align 4
; Dummy call
call void @llvm.amdgcn.s.barrier()
- %reload = load i32, i32* %alloca_ptr, align 4, !range !0
+ %reload = load i32, i32 addrspace(5)* %alloca_ptr, align 4, !range !0
%out_ptr = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
store i32 %reload, i32 addrspace(1)* %out_ptr, align 4
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/byval-frame-setup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/byval-frame-setup.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/byval-frame-setup.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/byval-frame-setup.ll Fri Feb 2 08:07:16 2018
@@ -14,16 +14,16 @@
; GCN-NOT: s32
; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s5 offset:20{{$}}
; GCN-NOT: s32
-define void @void_func_byval_struct(%struct.ByValStruct* byval noalias nocapture align 4 %arg0, %struct.ByValStruct* byval noalias nocapture align 4 %arg1) #1 {
+define void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 {
entry:
- %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0
- %tmp = load volatile i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
+ %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4
%add = add nsw i32 %tmp, 1
- store volatile i32 %add, i32* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0
- %tmp1 = load volatile i32, i32* %arrayidx2, align 4
+ store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
+ %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4
%add3 = add nsw i32 %tmp1, 2
- store volatile i32 %add3, i32* %arrayidx2, align 4
+ store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4
store volatile i32 9, i32 addrspace(1)* null, align 4
ret void
}
@@ -54,17 +54,17 @@ entry:
; GCN: buffer_load_dword v33,
; GCN: s_sub_u32 s32, s32, 0xb00{{$}}
; GCN: s_setpc_b64
-define void @void_func_byval_struct_non_leaf(%struct.ByValStruct* byval noalias nocapture align 4 %arg0, %struct.ByValStruct* byval noalias nocapture align 4 %arg1) #1 {
+define void @void_func_byval_struct_non_leaf(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 {
entry:
- %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0
- %tmp = load volatile i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
+ %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4
%add = add nsw i32 %tmp, 1
- store volatile i32 %add, i32* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0
- %tmp1 = load volatile i32, i32* %arrayidx2, align 4
+ store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
+ %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4
%add3 = add nsw i32 %tmp1, 2
call void @external_void_func_void()
- store volatile i32 %add3, i32* %arrayidx2, align 4
+ store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4
store volatile i32 9, i32 addrspace(1)* null, align 4
ret void
}
@@ -114,19 +114,19 @@ entry:
; GCN-NEXT: s_setpc_b64
define void @call_void_func_byval_struct_func() #0 {
entry:
- %arg0 = alloca %struct.ByValStruct, align 4
- %arg1 = alloca %struct.ByValStruct, align 4
- %tmp = bitcast %struct.ByValStruct* %arg0 to i8*
- call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp)
- %tmp1 = bitcast %struct.ByValStruct* %arg1 to i8*
- call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp1)
- %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0
- store volatile i32 9, i32* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0
- store volatile i32 13, i32* %arrayidx2, align 4
- call void @void_func_byval_struct(%struct.ByValStruct* byval nonnull align 4 %arg0, %struct.ByValStruct* byval nonnull align 4 %arg1)
- call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp1)
- call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp)
+ %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5)
+ %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5)
+ %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
+ call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
+ %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
+ call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
+ %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
+ store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
+ store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4
+ call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1)
+ call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
+ call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
ret void
}
@@ -167,45 +167,45 @@ entry:
; GCN: s_endpgm
define amdgpu_kernel void @call_void_func_byval_struct_kernel() #0 {
entry:
- %arg0 = alloca %struct.ByValStruct, align 4
- %arg1 = alloca %struct.ByValStruct, align 4
- %tmp = bitcast %struct.ByValStruct* %arg0 to i8*
- call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp)
- %tmp1 = bitcast %struct.ByValStruct* %arg1 to i8*
- call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp1)
- %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0
- store volatile i32 9, i32* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0
- store volatile i32 13, i32* %arrayidx2, align 4
- call void @void_func_byval_struct(%struct.ByValStruct* byval nonnull align 4 %arg0, %struct.ByValStruct* byval nonnull align 4 %arg1)
- call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp1)
- call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp)
+ %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5)
+ %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5)
+ %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
+ call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
+ %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
+ call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
+ %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
+ store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
+ store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4
+ call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1)
+ call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
+ call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
ret void
}
; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel_no_frame_pointer_elim:
define amdgpu_kernel void @call_void_func_byval_struct_kernel_no_frame_pointer_elim() #2 {
entry:
- %arg0 = alloca %struct.ByValStruct, align 4
- %arg1 = alloca %struct.ByValStruct, align 4
- %tmp = bitcast %struct.ByValStruct* %arg0 to i8*
- call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp)
- %tmp1 = bitcast %struct.ByValStruct* %arg1 to i8*
- call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp1)
- %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0
- store volatile i32 9, i32* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0
- store volatile i32 13, i32* %arrayidx2, align 4
- call void @void_func_byval_struct(%struct.ByValStruct* byval nonnull align 4 %arg0, %struct.ByValStruct* byval nonnull align 4 %arg1)
- call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp1)
- call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp)
+ %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5)
+ %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5)
+ %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
+ call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
+ %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
+ call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
+ %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
+ store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
+ store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4
+ call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1)
+ call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
+ call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
ret void
}
declare void @external_void_func_void() #0
-declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #3
-declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #3
+declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #3
+declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #3
attributes #0 = { nounwind }
attributes #1 = { noinline norecurse nounwind }
Modified: llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll Fri Feb 2 08:07:16 2018
@@ -38,8 +38,8 @@ declare i32 @external_i32_func_i32(i32)
; Structs
declare void @external_void_func_struct_i8_i32({ i8, i32 }) #0
-declare void @external_void_func_byval_struct_i8_i32({ i8, i32 }* byval) #0
-declare void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 }* sret, { i8, i32 }* byval) #0
+declare void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval) #0
+declare void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret, { i8, i32 } addrspace(5)* byval) #0
declare void @external_void_func_v16i8(<16 x i8>) #0
@@ -465,12 +465,12 @@ define amdgpu_kernel void @test_call_ext
; GCN-NEXT: s_swappc_b64
; GCN-NOT: [[SP]]
define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 {
- %val = alloca { i8, i32 }, align 4
- %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %val, i32 0, i32 0
- %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %val, i32 0, i32 1
- store i8 3, i8* %gep0
- store i32 8, i32* %gep1
- call void @external_void_func_byval_struct_i8_i32({ i8, i32 }* %val)
+ %val = alloca { i8, i32 }, align 4, addrspace(5)
+ %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 0
+ %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 1
+ store i8 3, i8 addrspace(5)* %gep0
+ store i32 8, i32 addrspace(5)* %gep1
+ call void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* %val)
ret void
}
@@ -497,17 +497,17 @@ define amdgpu_kernel void @test_call_ext
; GCN: buffer_store_byte [[LOAD_OUT_VAL0]], off
; GCN: buffer_store_dword [[LOAD_OUT_VAL1]], off
define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
- %in.val = alloca { i8, i32 }, align 4
- %out.val = alloca { i8, i32 }, align 4
- %in.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %in.val, i32 0, i32 0
- %in.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %in.val, i32 0, i32 1
- store i8 3, i8* %in.gep0
- store i32 8, i32* %in.gep1
- call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 }* %out.val, { i8, i32 }* %in.val)
- %out.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %out.val, i32 0, i32 0
- %out.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %out.val, i32 0, i32 1
- %out.val0 = load i8, i8* %out.gep0
- %out.val1 = load i32, i32* %out.gep1
+ %in.val = alloca { i8, i32 }, align 4, addrspace(5)
+ %out.val = alloca { i8, i32 }, align 4, addrspace(5)
+ %in.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 0
+ %in.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 1
+ store i8 3, i8 addrspace(5)* %in.gep0
+ store i32 8, i32 addrspace(5)* %in.gep1
+ call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* %out.val, { i8, i32 } addrspace(5)* %in.val)
+ %out.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 0
+ %out.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 1
+ %out.val0 = load i8, i8 addrspace(5)* %out.gep0
+ %out.val1 = load i32, i32 addrspace(5)* %out.gep1
store volatile i8 %out.val0, i8 addrspace(1)* undef
store volatile i32 %out.val1, i32 addrspace(1)* undef
Modified: llvm/trunk/test/CodeGen/AMDGPU/call-graph-register-usage.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/call-graph-register-usage.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/call-graph-register-usage.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/call-graph-register-usage.ll Fri Feb 2 08:07:16 2018
@@ -132,24 +132,24 @@ define amdgpu_kernel void @indirect_2_le
; GCN-LABEL: {{^}}use_stack0:
; GCN: ScratchSize: 2052
define void @use_stack0() #1 {
- %alloca = alloca [512 x i32], align 4
- call void asm sideeffect "; use $0", "v"([512 x i32]* %alloca) #0
+ %alloca = alloca [512 x i32], align 4, addrspace(5)
+ call void asm sideeffect "; use $0", "v"([512 x i32] addrspace(5)* %alloca) #0
ret void
}
; GCN-LABEL: {{^}}use_stack1:
; GCN: ScratchSize: 404
define void @use_stack1() #1 {
- %alloca = alloca [100 x i32], align 4
- call void asm sideeffect "; use $0", "v"([100 x i32]* %alloca) #0
+ %alloca = alloca [100 x i32], align 4, addrspace(5)
+ call void asm sideeffect "; use $0", "v"([100 x i32] addrspace(5)* %alloca) #0
ret void
}
; GCN-LABEL: {{^}}indirect_use_stack:
; GCN: ScratchSize: 2124
define void @indirect_use_stack() #1 {
- %alloca = alloca [16 x i32], align 4
- call void asm sideeffect "; use $0", "v"([16 x i32]* %alloca) #0
+ %alloca = alloca [16 x i32], align 4, addrspace(5)
+ call void asm sideeffect "; use $0", "v"([16 x i32] addrspace(5)* %alloca) #0
call void @use_stack0()
ret void
}
@@ -201,8 +201,8 @@ define amdgpu_kernel void @usage_externa
; GCN-LABEL: {{^}}direct_recursion_use_stack:
; GCN: ScratchSize: 2056
define void @direct_recursion_use_stack(i32 %val) #2 {
- %alloca = alloca [512 x i32], align 4
- call void asm sideeffect "; use $0", "v"([512 x i32]* %alloca) #0
+ %alloca = alloca [512 x i32], align 4, addrspace(5)
+ call void asm sideeffect "; use $0", "v"([512 x i32] addrspace(5)* %alloca) #0
%cmp = icmp eq i32 %val, 0
br i1 %cmp, label %ret, label %call
Modified: llvm/trunk/test/CodeGen/AMDGPU/callee-frame-setup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/callee-frame-setup.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/callee-frame-setup.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/callee-frame-setup.ll Fri Feb 2 08:07:16 2018
@@ -28,8 +28,8 @@ define void @callee_no_stack_no_fp_elim(
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
define void @callee_with_stack() #0 {
- %alloca = alloca i32
- store volatile i32 0, i32* %alloca
+ %alloca = alloca i32, addrspace(5)
+ store volatile i32 0, i32 addrspace(5)* %alloca
ret void
}
@@ -57,8 +57,8 @@ define void @callee_with_stack() #0 {
; GCN: s_waitcnt
; GCN-NEXT: s_setpc_b64
define void @callee_with_stack_and_call() #0 {
- %alloca = alloca i32
- store volatile i32 0, i32* %alloca
+ %alloca = alloca i32, addrspace(5)
+ store volatile i32 0, i32 addrspace(5)* %alloca
call void @external_void_func_void()
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll Fri Feb 2 08:07:16 2018
@@ -43,8 +43,8 @@ define amdgpu_kernel void @kern_indirect
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}}
define void @use_queue_ptr_addrspacecast() #1 {
- %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32 addrspace(4)*
- store volatile i32 0, i32 addrspace(4)* %asc
+ %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32*
+ store volatile i32 0, i32* %asc
ret void
}
@@ -113,8 +113,8 @@ define void @use_workgroup_id_x() #1 {
; GCN: ; use s6
; GCN: s_setpc_b64
define void @use_stack_workgroup_id_x() #1 {
- %alloca = alloca i32
- store volatile i32 0, i32* %alloca
+ %alloca = alloca i32, addrspace(5)
+ store volatile i32 0, i32 addrspace(5)* %alloca
%val = call i32 @llvm.amdgcn.workgroup.id.x()
call void asm sideeffect "; use $0", "s"(i32 %val)
ret void
@@ -432,8 +432,8 @@ define amdgpu_kernel void @kern_indirect
; GCN: ; use s15
; GCN: ; use s16
define void @use_every_sgpr_input() #1 {
- %alloca = alloca i32, align 4
- store volatile i32 0, i32* %alloca
+ %alloca = alloca i32, align 4, addrspace(5)
+ store volatile i32 0, i32 addrspace(5)* %alloca
%dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
%dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
@@ -512,8 +512,8 @@ define void @func_indirect_use_every_sgp
; GCN-DAG: s_mov_b32 s8, s16
; GCN: s_swappc_b64
define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
- %alloca = alloca i32, align 4
- store volatile i32 0, i32* %alloca
+ %alloca = alloca i32, align 4, addrspace(5)
+ store volatile i32 0, i32 addrspace(5)* %alloca
%dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
%dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
@@ -568,10 +568,10 @@ define void @func_use_every_sgpr_input_c
; GCN: ; use [[SAVE_Y]]
; GCN: ; use [[SAVE_Z]]
define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 {
- %alloca = alloca i32, align 4
+ %alloca = alloca i32, align 4, addrspace(5)
call void @use_workgroup_id_xyz()
- store volatile i32 0, i32* %alloca
+ store volatile i32 0, i32 addrspace(5)* %alloca
%dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
%dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
Modified: llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll Fri Feb 2 08:07:16 2018
@@ -368,7 +368,7 @@ define void @too_many_args_use_workitem_
i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
- i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, i32* byval %arg32) #1 {
+ i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, i32 addrspace(5)* byval %arg32) #1 {
%val = call i32 @llvm.amdgcn.workitem.id.x()
store volatile i32 %val, i32 addrspace(1)* undef
@@ -407,7 +407,7 @@ define void @too_many_args_use_workitem_
store volatile i32 %arg29, i32 addrspace(1)* undef
store volatile i32 %arg30, i32 addrspace(1)* undef
store volatile i32 %arg31, i32 addrspace(1)* undef
- %private = load volatile i32, i32* %arg32
+ %private = load volatile i32, i32 addrspace(5)* %arg32
ret void
}
@@ -435,8 +435,8 @@ define void @too_many_args_use_workitem_
; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
; GCN: s_swappc_b64
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 {
- %alloca = alloca i32, align 4
- store volatile i32 999, i32* %alloca
+ %alloca = alloca i32, align 4, addrspace(5)
+ store volatile i32 999, i32 addrspace(5)* %alloca
call void @too_many_args_use_workitem_id_x_byval(
i32 10, i32 20, i32 30, i32 40,
i32 50, i32 60, i32 70, i32 80,
@@ -446,7 +446,7 @@ define amdgpu_kernel void @kern_call_too
i32 210, i32 220, i32 230, i32 240,
i32 250, i32 260, i32 270, i32 280,
i32 290, i32 300, i32 310, i32 320,
- i32* %alloca)
+ i32 addrspace(5)* %alloca)
ret void
}
@@ -460,8 +460,8 @@ define amdgpu_kernel void @kern_call_too
; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
; GCN: s_swappc_b64
define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
- %alloca = alloca i32, align 4
- store volatile i32 999, i32* %alloca
+ %alloca = alloca i32, align 4, addrspace(5)
+ store volatile i32 999, i32 addrspace(5)* %alloca
call void @too_many_args_use_workitem_id_x_byval(
i32 10, i32 20, i32 30, i32 40,
i32 50, i32 60, i32 70, i32 80,
@@ -471,7 +471,7 @@ define void @func_call_too_many_args_use
i32 210, i32 220, i32 230, i32 240,
i32 250, i32 260, i32 270, i32 280,
i32 290, i32 300, i32 310, i32 320,
- i32* %alloca)
+ i32 addrspace(5)* %alloca)
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll Fri Feb 2 08:07:16 2018
@@ -5,10 +5,10 @@
; GCN: buffer_store_dword [[FI]]
define amdgpu_kernel void @store_fi_lifetime(i32 addrspace(1)* %out, i32 %in) #0 {
entry:
- %b = alloca i8
- call void @llvm.lifetime.start.p0i8(i64 1, i8* %b)
- store volatile i8* %b, i8* addrspace(1)* undef
- call void @llvm.lifetime.end.p0i8(i64 1, i8* %b)
+ %b = alloca i8, addrspace(5)
+ call void @llvm.lifetime.start.p5i8(i64 1, i8 addrspace(5)* %b)
+ store volatile i8 addrspace(5)* %b, i8 addrspace(5)* addrspace(1)* undef
+ call void @llvm.lifetime.end.p5i8(i64 1, i8 addrspace(5)* %b)
ret void
}
@@ -18,10 +18,10 @@ entry:
; GCN: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 4{{$}}
; GCN: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]]
; GCN: ds_write_b32 [[VLDSPTR]], [[ZERO0]]
-define amdgpu_kernel void @stored_fi_to_lds(float* addrspace(3)* %ptr) #0 {
- %tmp = alloca float
- store float 4.0, float *%tmp
- store float* %tmp, float* addrspace(3)* %ptr
+define amdgpu_kernel void @stored_fi_to_lds(float addrspace(5)* addrspace(3)* %ptr) #0 {
+ %tmp = alloca float, addrspace(5)
+ store float 4.0, float addrspace(5)*%tmp
+ store float addrspace(5)* %tmp, float addrspace(5)* addrspace(3)* %ptr
ret void
}
@@ -38,13 +38,13 @@ define amdgpu_kernel void @stored_fi_to_
; GCN-DAG: v_mov_b32_e32 [[FI1:v[0-9]+]], 8{{$}}
; GCN: ds_write_b32 [[VLDSPTR]], [[FI1]]
-define amdgpu_kernel void @stored_fi_to_lds_2_small_objects(float* addrspace(3)* %ptr) #0 {
- %tmp0 = alloca float
- %tmp1 = alloca float
- store float 4.0, float* %tmp0
- store float 4.0, float* %tmp1
- store volatile float* %tmp0, float* addrspace(3)* %ptr
- store volatile float* %tmp1, float* addrspace(3)* %ptr
+define amdgpu_kernel void @stored_fi_to_lds_2_small_objects(float addrspace(5)* addrspace(3)* %ptr) #0 {
+ %tmp0 = alloca float, addrspace(5)
+ %tmp1 = alloca float, addrspace(5)
+ store float 4.0, float addrspace(5)* %tmp0
+ store float 4.0, float addrspace(5)* %tmp1
+ store volatile float addrspace(5)* %tmp0, float addrspace(5)* addrspace(3)* %ptr
+ store volatile float addrspace(5)* %tmp1, float addrspace(5)* addrspace(3)* %ptr
ret void
}
@@ -55,12 +55,12 @@ define amdgpu_kernel void @stored_fi_to_
; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 4{{$}}
; GCN: buffer_store_dword [[ZERO]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4{{$}}
define amdgpu_kernel void @stored_fi_to_self() #0 {
- %tmp = alloca i32*
+ %tmp = alloca i32 addrspace(5)*, addrspace(5)
; Avoid optimizing everything out
- store volatile i32* inttoptr (i32 1234 to i32*), i32** %tmp
- %bitcast = bitcast i32** %tmp to i32*
- store volatile i32* %bitcast, i32** %tmp
+ store volatile i32 addrspace(5)* inttoptr (i32 1234 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp
+ %bitcast = bitcast i32 addrspace(5)* addrspace(5)* %tmp to i32 addrspace(5)*
+ store volatile i32 addrspace(5)* %bitcast, i32 addrspace(5)* addrspace(5)* %tmp
ret void
}
@@ -74,17 +74,17 @@ define amdgpu_kernel void @stored_fi_to_
; GCN: v_mov_b32_e32 [[OFFSETK:v[0-9]+]], 0x804{{$}}
; GCN: buffer_store_dword [[OFFSETK]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2052{{$}}
define amdgpu_kernel void @stored_fi_to_self_offset() #0 {
- %tmp0 = alloca [512 x i32]
- %tmp1 = alloca i32*
+ %tmp0 = alloca [512 x i32], addrspace(5)
+ %tmp1 = alloca i32 addrspace(5)*, addrspace(5)
; Avoid optimizing everything out
- %tmp0.cast = bitcast [512 x i32]* %tmp0 to i32*
- store volatile i32 32, i32* %tmp0.cast
+ %tmp0.cast = bitcast [512 x i32] addrspace(5)* %tmp0 to i32 addrspace(5)*
+ store volatile i32 32, i32 addrspace(5)* %tmp0.cast
- store volatile i32* inttoptr (i32 1234 to i32*), i32** %tmp1
+ store volatile i32 addrspace(5)* inttoptr (i32 1234 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp1
- %bitcast = bitcast i32** %tmp1 to i32*
- store volatile i32* %bitcast, i32** %tmp1
+ %bitcast = bitcast i32 addrspace(5)* addrspace(5)* %tmp1 to i32 addrspace(5)*
+ store volatile i32 addrspace(5)* %bitcast, i32 addrspace(5)* addrspace(5)* %tmp1
ret void
}
@@ -99,18 +99,18 @@ define amdgpu_kernel void @stored_fi_to_
; GCN: v_mov_b32_e32 [[FI2:v[0-9]+]], 12{{$}}
; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:8{{$}}
define amdgpu_kernel void @stored_fi_to_fi() #0 {
- %tmp0 = alloca i32*
- %tmp1 = alloca i32*
- %tmp2 = alloca i32*
- store volatile i32* inttoptr (i32 1234 to i32*), i32** %tmp0
- store volatile i32* inttoptr (i32 5678 to i32*), i32** %tmp1
- store volatile i32* inttoptr (i32 9999 to i32*), i32** %tmp2
+ %tmp0 = alloca i32 addrspace(5)*, addrspace(5)
+ %tmp1 = alloca i32 addrspace(5)*, addrspace(5)
+ %tmp2 = alloca i32 addrspace(5)*, addrspace(5)
+ store volatile i32 addrspace(5)* inttoptr (i32 1234 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp0
+ store volatile i32 addrspace(5)* inttoptr (i32 5678 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp1
+ store volatile i32 addrspace(5)* inttoptr (i32 9999 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp2
- %bitcast1 = bitcast i32** %tmp1 to i32*
- %bitcast2 = bitcast i32** %tmp2 to i32* ; at offset 8
+ %bitcast1 = bitcast i32 addrspace(5)* addrspace(5)* %tmp1 to i32 addrspace(5)*
+ %bitcast2 = bitcast i32 addrspace(5)* addrspace(5)* %tmp2 to i32 addrspace(5)* ; at offset 8
- store volatile i32* %bitcast1, i32** %tmp2 ; store offset 4 at offset 8
- store volatile i32* %bitcast2, i32** %tmp1 ; store offset 8 at offset 4
+ store volatile i32 addrspace(5)* %bitcast1, i32 addrspace(5)* addrspace(5)* %tmp2 ; store offset 4 at offset 8
+ store volatile i32 addrspace(5)* %bitcast2, i32 addrspace(5)* addrspace(5)* %tmp1 ; store offset 8 at offset 4
ret void
}
@@ -118,10 +118,10 @@ define amdgpu_kernel void @stored_fi_to_
; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4{{$}}
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}}
; GCN: buffer_store_dword [[FI]]
-define amdgpu_kernel void @stored_fi_to_global(float* addrspace(1)* %ptr) #0 {
- %tmp = alloca float
- store float 0.0, float *%tmp
- store float* %tmp, float* addrspace(1)* %ptr
+define amdgpu_kernel void @stored_fi_to_global(float addrspace(5)* addrspace(1)* %ptr) #0 {
+ %tmp = alloca float, addrspace(5)
+ store float 0.0, float addrspace(5)*%tmp
+ store float addrspace(5)* %tmp, float addrspace(5)* addrspace(1)* %ptr
ret void
}
@@ -136,15 +136,15 @@ define amdgpu_kernel void @stored_fi_to_
; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 12{{$}}
; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-define amdgpu_kernel void @stored_fi_to_global_2_small_objects(float* addrspace(1)* %ptr) #0 {
- %tmp0 = alloca float
- %tmp1 = alloca float
- %tmp2 = alloca float
- store volatile float 0.0, float *%tmp0
- store volatile float 0.0, float *%tmp1
- store volatile float 0.0, float *%tmp2
- store volatile float* %tmp1, float* addrspace(1)* %ptr
- store volatile float* %tmp2, float* addrspace(1)* %ptr
+define amdgpu_kernel void @stored_fi_to_global_2_small_objects(float addrspace(5)* addrspace(1)* %ptr) #0 {
+ %tmp0 = alloca float, addrspace(5)
+ %tmp1 = alloca float, addrspace(5)
+ %tmp2 = alloca float, addrspace(5)
+ store volatile float 0.0, float addrspace(5)*%tmp0
+ store volatile float 0.0, float addrspace(5)*%tmp1
+ store volatile float 0.0, float addrspace(5)*%tmp2
+ store volatile float addrspace(5)* %tmp1, float addrspace(5)* addrspace(1)* %ptr
+ store volatile float addrspace(5)* %tmp2, float addrspace(5)* addrspace(1)* %ptr
ret void
}
@@ -163,19 +163,19 @@ define amdgpu_kernel void @stored_fi_to_
; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
; GCN: buffer_store_dword [[BASE_1_OFF_2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-define amdgpu_kernel void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 {
- %tmp0 = alloca [4096 x i32]
- %tmp1 = alloca [4096 x i32]
- %gep0.tmp0 = getelementptr [4096 x i32], [4096 x i32]* %tmp0, i32 0, i32 0
- store volatile i32 0, i32* %gep0.tmp0
- %gep1.tmp0 = getelementptr [4096 x i32], [4096 x i32]* %tmp0, i32 0, i32 4095
- store volatile i32 999, i32* %gep1.tmp0
- %gep0.tmp1 = getelementptr [4096 x i32], [4096 x i32]* %tmp0, i32 0, i32 14
- store i32* %gep0.tmp1, i32* addrspace(1)* %ptr
+define amdgpu_kernel void @stored_fi_to_global_huge_frame_offset(i32 addrspace(5)* addrspace(1)* %ptr) #0 {
+ %tmp0 = alloca [4096 x i32], addrspace(5)
+ %tmp1 = alloca [4096 x i32], addrspace(5)
+ %gep0.tmp0 = getelementptr [4096 x i32], [4096 x i32] addrspace(5)* %tmp0, i32 0, i32 0
+ store volatile i32 0, i32 addrspace(5)* %gep0.tmp0
+ %gep1.tmp0 = getelementptr [4096 x i32], [4096 x i32] addrspace(5)* %tmp0, i32 0, i32 4095
+ store volatile i32 999, i32 addrspace(5)* %gep1.tmp0
+ %gep0.tmp1 = getelementptr [4096 x i32], [4096 x i32] addrspace(5)* %tmp0, i32 0, i32 14
+ store i32 addrspace(5)* %gep0.tmp1, i32 addrspace(5)* addrspace(1)* %ptr
ret void
}
- at g1 = external addrspace(1) global i32*
+ at g1 = external addrspace(1) global i32 addrspace(5)*
; This was leaving a dead node around resulting in failing to select
; on the leftover AssertZext's ValueType operand.
@@ -188,16 +188,16 @@ define amdgpu_kernel void @stored_fi_to_
; GCN: buffer_store_dword [[FI]]
define amdgpu_kernel void @cannot_select_assertzext_valuetype(i32 addrspace(1)* %out, i32 %idx) #0 {
entry:
- %b = alloca i32, align 4
- %tmp1 = load volatile i32*, i32* addrspace(1)* @g1, align 4
- %arrayidx = getelementptr inbounds i32, i32* %tmp1, i32 %idx
- %tmp2 = load i32, i32* %arrayidx, align 4
- store volatile i32* %b, i32* addrspace(1)* undef
+ %b = alloca i32, align 4, addrspace(5)
+ %tmp1 = load volatile i32 addrspace(5)*, i32 addrspace(5)* addrspace(1)* @g1, align 4
+ %arrayidx = getelementptr inbounds i32, i32 addrspace(5)* %tmp1, i32 %idx
+ %tmp2 = load i32, i32 addrspace(5)* %arrayidx, align 4
+ store volatile i32 addrspace(5)* %b, i32 addrspace(5)* addrspace(1)* undef
ret void
}
-declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
-declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #1
+declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #1
attributes #0 = { nounwind }
attributes #1 = { argmemonly nounwind }
Modified: llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll Fri Feb 2 08:07:16 2018
@@ -6,32 +6,32 @@
; RUN: llc -march=amdgcn -amdgpu-scalarize-global-loads=false -mcpu=gfx900 -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
; OPT-LABEL: @test_no_sink_flat_small_offset_i32(
-; OPT-CIVI: getelementptr i32, i32 addrspace(4)* %in
+; OPT-CIVI: getelementptr i32, i32* %in
; OPT-CIVI: br i1
; OPT-CIVI-NOT: ptrtoint
; OPT-GFX9: br
-; OPT-GFX9: %sunkaddr = getelementptr i8, i8 addrspace(4)* %0, i64 28
-; OPT-GFX9: %1 = bitcast i8 addrspace(4)* %sunkaddr to i32 addrspace(4)*
-; OPT-GFX9: load i32, i32 addrspace(4)* %1
+; OPT-GFX9: %sunkaddr = getelementptr i8, i8* %0, i64 28
+; OPT-GFX9: %1 = bitcast i8* %sunkaddr to i32*
+; OPT-GFX9: load i32, i32* %1
; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32:
; GCN: flat_load_dword
; GCN: {{^}}BB0_2:
-define amdgpu_kernel void @test_no_sink_flat_small_offset_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
+define amdgpu_kernel void @test_no_sink_flat_small_offset_i32(i32* %out, i32* %in, i32 %cond) {
entry:
- %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
- %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
+ %out.gep = getelementptr i32, i32* %out, i64 999999
+ %in.gep = getelementptr i32, i32* %in, i64 7
%tmp0 = icmp eq i32 %cond, 0
br i1 %tmp0, label %endif, label %if
if:
- %tmp1 = load i32, i32 addrspace(4)* %in.gep
+ %tmp1 = load i32, i32* %in.gep
br label %endif
endif:
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
- store i32 %x, i32 addrspace(4)* %out.gep
+ store i32 %x, i32* %out.gep
br label %done
done:
@@ -39,7 +39,7 @@ done:
}
; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
-; OPT: getelementptr i32, i32 addrspace(4)* %out,
+; OPT: getelementptr i32, i32* %out,
; rOPT-CI-NOT: getelementptr
; OPT: br i1
@@ -50,11 +50,11 @@ done:
; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_global_i32:
; CI: buffer_load_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
-define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_global_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
+define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_global_i32(i32* %out, i32* %in, i32 %cond) {
entry:
- %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
- %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
- %cast = addrspacecast i32 addrspace(4)* %in.gep to i32 addrspace(1)*
+ %out.gep = getelementptr i32, i32* %out, i64 999999
+ %in.gep = getelementptr i32, i32* %in, i64 7
+ %cast = addrspacecast i32* %in.gep to i32 addrspace(1)*
%tmp0 = icmp eq i32 %cond, 0
br i1 %tmp0, label %endif, label %if
@@ -64,7 +64,7 @@ if:
endif:
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
- store i32 %x, i32 addrspace(4)* %out.gep
+ store i32 %x, i32* %out.gep
br label %done
done:
@@ -72,7 +72,7 @@ done:
}
; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32(
-; OPT: getelementptr i32, i32 addrspace(4)* %out,
+; OPT: getelementptr i32, i32* %out,
; OPT-CI-NOT: getelementptr
; OPT: br i1
@@ -83,11 +83,11 @@ done:
; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_constant_i32:
; CI: s_load_dword {{s[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
-define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_constant_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
+define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_constant_i32(i32* %out, i32* %in, i32 %cond) {
entry:
- %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
- %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
- %cast = addrspacecast i32 addrspace(4)* %in.gep to i32 addrspace(2)*
+ %out.gep = getelementptr i32, i32* %out, i64 999999
+ %in.gep = getelementptr i32, i32* %in, i64 7
+ %cast = addrspacecast i32* %in.gep to i32 addrspace(2)*
%tmp0 = icmp eq i32 %cond, 0
br i1 %tmp0, label %endif, label %if
@@ -97,7 +97,7 @@ if:
endif:
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
- store i32 %x, i32 addrspace(4)* %out.gep
+ store i32 %x, i32* %out.gep
br label %done
done:
@@ -105,34 +105,34 @@ done:
}
; OPT-LABEL: @test_sink_flat_small_max_flat_offset(
-; OPT-CIVI: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4095
+; OPT-CIVI: %in.gep = getelementptr i8, i8* %in, i64 4095
; OPT-CIVI: br
; OPT-CIVI-NOT: getelementptr
-; OPT-CIVI: load i8, i8 addrspace(4)* %in.gep
+; OPT-CIVI: load i8, i8* %in.gep
; OPT-GFX9: br
-; OPT-GFX9: %sunkaddr = getelementptr i8, i8 addrspace(4)* %in, i64 4095
-; OPT-GFX9: load i8, i8 addrspace(4)* %sunkaddr
+; OPT-GFX9: %sunkaddr = getelementptr i8, i8* %in, i64 4095
+; OPT-GFX9: load i8, i8* %sunkaddr
; GCN-LABEL: {{^}}test_sink_flat_small_max_flat_offset:
; GFX9: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
; CIVI: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @test_sink_flat_small_max_flat_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in) #1 {
+define amdgpu_kernel void @test_sink_flat_small_max_flat_offset(i32* %out, i8* %in) #1 {
entry:
- %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024
- %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4095
+ %out.gep = getelementptr i32, i32* %out, i32 1024
+ %in.gep = getelementptr i8, i8* %in, i64 4095
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
- %tmp1 = load i8, i8 addrspace(4)* %in.gep
+ %tmp1 = load i8, i8* %in.gep
%tmp2 = sext i8 %tmp1 to i32
br label %endif
endif:
%x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
- store i32 %x, i32 addrspace(4)* %out.gep
+ store i32 %x, i32* %out.gep
br label %done
done:
@@ -140,29 +140,29 @@ done:
}
; OPT-LABEL: @test_sink_flat_small_max_plus_1_flat_offset(
-; OPT: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4096
+; OPT: %in.gep = getelementptr i8, i8* %in, i64 4096
; OPT: br
; OPT-NOT: getelementptr
-; OPT: load i8, i8 addrspace(4)* %in.gep
+; OPT: load i8, i8* %in.gep
; GCN-LABEL: {{^}}test_sink_flat_small_max_plus_1_flat_offset:
; GCN: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @test_sink_flat_small_max_plus_1_flat_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in) #1 {
+define amdgpu_kernel void @test_sink_flat_small_max_plus_1_flat_offset(i32* %out, i8* %in) #1 {
entry:
- %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 99999
- %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4096
+ %out.gep = getelementptr i32, i32* %out, i64 99999
+ %in.gep = getelementptr i8, i8* %in, i64 4096
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
- %tmp1 = load i8, i8 addrspace(4)* %in.gep
+ %tmp1 = load i8, i8* %in.gep
%tmp2 = sext i8 %tmp1 to i32
br label %endif
endif:
%x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
- store i32 %x, i32 addrspace(4)* %out.gep
+ store i32 %x, i32* %out.gep
br label %done
done:
@@ -170,30 +170,30 @@ done:
}
; OPT-LABEL: @test_no_sink_flat_reg_offset(
-; OPT: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 %reg
+; OPT: %in.gep = getelementptr i8, i8* %in, i64 %reg
; OPT: br
; OPT-NOT: getelementptr
-; OPT: load i8, i8 addrspace(4)* %in.gep
+; OPT: load i8, i8* %in.gep
; GCN-LABEL: {{^}}test_no_sink_flat_reg_offset:
; GCN: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @test_no_sink_flat_reg_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in, i64 %reg) #1 {
+define amdgpu_kernel void @test_no_sink_flat_reg_offset(i32* %out, i8* %in, i64 %reg) #1 {
entry:
- %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024
- %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 %reg
+ %out.gep = getelementptr i32, i32* %out, i32 1024
+ %in.gep = getelementptr i8, i8* %in, i64 %reg
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
- %tmp1 = load i8, i8 addrspace(4)* %in.gep
+ %tmp1 = load i8, i8* %in.gep
%tmp2 = sext i8 %tmp1 to i32
br label %endif
endif:
%x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
- store i32 %x, i32 addrspace(4)* %out.gep
+ store i32 %x, i32* %out.gep
br label %done
done:
Modified: llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll Fri Feb 2 08:07:16 2018
@@ -7,7 +7,7 @@
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-scalarize-global-loads=false -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=SICIVI %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -amdgpu-scalarize-global-loads=false -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
-target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
; OPT-LABEL: @test_sink_global_small_offset_i32(
; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
@@ -137,24 +137,24 @@ done:
; GCN: {{^}}BB4_2:
define amdgpu_kernel void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
entry:
- %alloca = alloca [512 x i32], align 4
+ %alloca = alloca [512 x i32], align 4, addrspace(5)
%out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%add.arg = add i32 %arg, 8
- %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1022
+ %alloca.gep = getelementptr [512 x i32], [512 x i32] addrspace(5)* %alloca, i32 0, i32 1022
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
- store volatile i32 123, i32* %alloca.gep
- %tmp1 = load volatile i32, i32* %alloca.gep
+ store volatile i32 123, i32 addrspace(5)* %alloca.gep
+ %tmp1 = load volatile i32, i32 addrspace(5)* %alloca.gep
br label %endif
endif:
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
store i32 %x, i32 addrspace(1)* %out.gep.0
- %load = load volatile i32, i32* %alloca.gep
+ %load = load volatile i32, i32 addrspace(5)* %alloca.gep
store i32 %load, i32 addrspace(1)* %out.gep.1
br label %done
@@ -178,24 +178,24 @@ done:
define amdgpu_kernel void @test_sink_scratch_small_offset_i32_reserved(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
entry:
- %alloca = alloca [512 x i32], align 4
+ %alloca = alloca [512 x i32], align 4, addrspace(5)
%out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%add.arg = add i32 %arg, 8
- %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1023
+ %alloca.gep = getelementptr [512 x i32], [512 x i32] addrspace(5)* %alloca, i32 0, i32 1023
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
- store volatile i32 123, i32* %alloca.gep
- %tmp1 = load volatile i32, i32* %alloca.gep
+ store volatile i32 123, i32 addrspace(5)* %alloca.gep
+ %tmp1 = load volatile i32, i32 addrspace(5)* %alloca.gep
br label %endif
endif:
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
store i32 %x, i32 addrspace(1)* %out.gep.0
- %load = load volatile i32, i32* %alloca.gep
+ %load = load volatile i32, i32 addrspace(5)* %alloca.gep
store i32 %load, i32 addrspace(1)* %out.gep.1
br label %done
@@ -204,7 +204,7 @@ done:
}
; OPT-LABEL: @test_no_sink_scratch_large_offset_i32(
-; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
+; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32] addrspace(5)* %alloca, i32 0, i32 1024
; OPT: br i1
; OPT-NOT: ptrtoint
@@ -215,24 +215,24 @@ done:
; GCN: {{^BB[0-9]+}}_2:
define amdgpu_kernel void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
entry:
- %alloca = alloca [512 x i32], align 4
+ %alloca = alloca [512 x i32], align 4, addrspace(5)
%out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%add.arg = add i32 %arg, 8
- %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
+ %alloca.gep = getelementptr [512 x i32], [512 x i32] addrspace(5)* %alloca, i32 0, i32 1024
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
- store volatile i32 123, i32* %alloca.gep
- %tmp1 = load volatile i32, i32* %alloca.gep
+ store volatile i32 123, i32 addrspace(5)* %alloca.gep
+ %tmp1 = load volatile i32, i32 addrspace(5)* %alloca.gep
br label %endif
endif:
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
store i32 %x, i32 addrspace(1)* %out.gep.0
- %load = load volatile i32, i32* %alloca.gep
+ %load = load volatile i32, i32 addrspace(5)* %alloca.gep
store i32 %load, i32 addrspace(1)* %out.gep.1
br label %done
Modified: llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll Fri Feb 2 08:07:16 2018
@@ -237,7 +237,7 @@ bb2:
br i1 %tmp3, label %bb4, label %bb10
bb4: ; preds = %bb2
- %tmp6 = load float, float* undef
+ %tmp6 = load float, float addrspace(5)* undef
%tmp7 = fcmp olt float %tmp6, 0.0
br i1 %tmp7, label %bb8, label %Flow
@@ -257,7 +257,7 @@ Flow1:
br label %bb1
bb12: ; preds = %bb10
- store volatile <4 x float> %tmp11, <4 x float>* undef, align 16
+ store volatile <4 x float> %tmp11, <4 x float> addrspace(5)* undef, align 16
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/commute-compares.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/commute-compares.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/commute-compares.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/commute-compares.ll Fri Feb 2 08:07:16 2018
@@ -703,9 +703,9 @@ define amdgpu_kernel void @commute_uno_2
; GCN: v_cmp_eq_u32_e32 vcc, v{{[0-9]+}}, [[FI]]
define amdgpu_kernel void @commute_frameindex(i32 addrspace(1)* nocapture %out) #0 {
entry:
- %stack0 = alloca i32
- %ptr0 = load volatile i32*, i32* addrspace(1)* undef
- %eq = icmp eq i32* %ptr0, %stack0
+ %stack0 = alloca i32, addrspace(5)
+ %ptr0 = load volatile i32 addrspace(5)*, i32 addrspace(5)* addrspace(1)* undef
+ %eq = icmp eq i32 addrspace(5)* %ptr0, %stack0
%ext = zext i1 %eq to i32
store volatile i32 %ext, i32 addrspace(1)* %out
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/copy-to-reg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/copy-to-reg.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/copy-to-reg.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/copy-to-reg.ll Fri Feb 2 08:07:16 2018
@@ -8,20 +8,20 @@
; CHECK-LABEL: {{^}}copy_to_reg_frameindex:
define amdgpu_kernel void @copy_to_reg_frameindex(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
entry:
- %alloca = alloca [16 x i32]
+ %alloca = alloca [16 x i32], addrspace(5)
br label %loop
loop:
%inc = phi i32 [0, %entry], [%inc.i, %loop]
- %ptr = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %inc
- store i32 %inc, i32* %ptr
+ %ptr = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %inc
+ store i32 %inc, i32 addrspace(5)* %ptr
%inc.i = add i32 %inc, 1
%cnd = icmp uge i32 %inc.i, 16
br i1 %cnd, label %done, label %loop
done:
- %tmp0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 0
- %tmp1 = load i32, i32* %tmp0
+ %tmp0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0
+ %tmp1 = load i32, i32 addrspace(5)* %tmp0
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll Fri Feb 2 08:07:16 2018
@@ -5,8 +5,8 @@
; SI: buffer_load_sbyte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4{{$}}
define amdgpu_kernel void @load_i8_sext_private(i32 addrspace(1)* %out) {
entry:
- %tmp0 = alloca i8
- %tmp1 = load i8, i8* %tmp0
+ %tmp0 = alloca i8, addrspace(5)
+ %tmp1 = load i8, i8 addrspace(5)* %tmp0
%tmp2 = sext i8 %tmp1 to i32
store i32 %tmp2, i32 addrspace(1)* %out
ret void
@@ -16,8 +16,8 @@ entry:
; SI: buffer_load_ubyte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4{{$}}
define amdgpu_kernel void @load_i8_zext_private(i32 addrspace(1)* %out) {
entry:
- %tmp0 = alloca i8
- %tmp1 = load i8, i8* %tmp0
+ %tmp0 = alloca i8, addrspace(5)
+ %tmp1 = load i8, i8 addrspace(5)* %tmp0
%tmp2 = zext i8 %tmp1 to i32
store i32 %tmp2, i32 addrspace(1)* %out
ret void
@@ -27,8 +27,8 @@ entry:
; SI: buffer_load_sshort v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4{{$}}
define amdgpu_kernel void @load_i16_sext_private(i32 addrspace(1)* %out) {
entry:
- %tmp0 = alloca i16
- %tmp1 = load i16, i16* %tmp0
+ %tmp0 = alloca i16, addrspace(5)
+ %tmp1 = load i16, i16 addrspace(5)* %tmp0
%tmp2 = sext i16 %tmp1 to i32
store i32 %tmp2, i32 addrspace(1)* %out
ret void
@@ -38,8 +38,8 @@ entry:
; SI: buffer_load_ushort v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4{{$}}
define amdgpu_kernel void @load_i16_zext_private(i32 addrspace(1)* %out) {
entry:
- %tmp0 = alloca i16
- %tmp1 = load volatile i16, i16* %tmp0
+ %tmp0 = alloca i16, addrspace(5)
+ %tmp1 = load volatile i16, i16 addrspace(5)* %tmp0
%tmp2 = zext i16 %tmp1 to i32
store i32 %tmp2, i32 addrspace(1)* %out
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/flat-address-space.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/flat-address-space.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/flat-address-space.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/flat-address-space.ll Fri Feb 2 08:07:16 2018
@@ -19,42 +19,42 @@
; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
; CHECK: flat_store_dword v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}, v[[DATA]]
define amdgpu_kernel void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
- %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
- store volatile i32 %x, i32 addrspace(4)* %fptr, align 4
+ %fptr = addrspacecast i32 addrspace(1)* %gptr to i32*
+ store volatile i32 %x, i32* %fptr, align 4
ret void
}
; CHECK-LABEL: {{^}}store_flat_i64:
; CHECK: flat_store_dwordx2
define amdgpu_kernel void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 {
- %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
- store volatile i64 %x, i64 addrspace(4)* %fptr, align 8
+ %fptr = addrspacecast i64 addrspace(1)* %gptr to i64*
+ store volatile i64 %x, i64* %fptr, align 8
ret void
}
; CHECK-LABEL: {{^}}store_flat_v4i32:
; CHECK: flat_store_dwordx4
define amdgpu_kernel void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 {
- %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
- store volatile <4 x i32> %x, <4 x i32> addrspace(4)* %fptr, align 16
+ %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32>*
+ store volatile <4 x i32> %x, <4 x i32>* %fptr, align 16
ret void
}
; CHECK-LABEL: {{^}}store_flat_trunc_i16:
; CHECK: flat_store_short
define amdgpu_kernel void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 {
- %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
+ %fptr = addrspacecast i16 addrspace(1)* %gptr to i16*
%y = trunc i32 %x to i16
- store volatile i16 %y, i16 addrspace(4)* %fptr, align 2
+ store volatile i16 %y, i16* %fptr, align 2
ret void
}
; CHECK-LABEL: {{^}}store_flat_trunc_i8:
; CHECK: flat_store_byte
define amdgpu_kernel void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
- %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
+ %fptr = addrspacecast i8 addrspace(1)* %gptr to i8*
%y = trunc i32 %x to i8
- store volatile i8 %y, i8 addrspace(4)* %fptr, align 2
+ store volatile i8 %y, i8* %fptr, align 2
ret void
}
@@ -63,8 +63,8 @@ define amdgpu_kernel void @store_flat_tr
; CHECK-LABEL: load_flat_i32:
; CHECK: flat_load_dword
define amdgpu_kernel void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
- %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
- %fload = load volatile i32, i32 addrspace(4)* %fptr, align 4
+ %fptr = addrspacecast i32 addrspace(1)* %gptr to i32*
+ %fload = load volatile i32, i32* %fptr, align 4
store i32 %fload, i32 addrspace(1)* %out, align 4
ret void
}
@@ -72,8 +72,8 @@ define amdgpu_kernel void @load_flat_i32
; CHECK-LABEL: load_flat_i64:
; CHECK: flat_load_dwordx2
define amdgpu_kernel void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
- %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
- %fload = load volatile i64, i64 addrspace(4)* %fptr, align 8
+ %fptr = addrspacecast i64 addrspace(1)* %gptr to i64*
+ %fload = load volatile i64, i64* %fptr, align 8
store i64 %fload, i64 addrspace(1)* %out, align 8
ret void
}
@@ -81,8 +81,8 @@ define amdgpu_kernel void @load_flat_i64
; CHECK-LABEL: load_flat_v4i32:
; CHECK: flat_load_dwordx4
define amdgpu_kernel void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
- %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
- %fload = load volatile <4 x i32>, <4 x i32> addrspace(4)* %fptr, align 32
+ %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32>*
+ %fload = load volatile <4 x i32>, <4 x i32>* %fptr, align 32
store <4 x i32> %fload, <4 x i32> addrspace(1)* %out, align 8
ret void
}
@@ -90,8 +90,8 @@ define amdgpu_kernel void @load_flat_v4i
; CHECK-LABEL: sextload_flat_i8:
; CHECK: flat_load_sbyte
define amdgpu_kernel void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
- %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
- %fload = load volatile i8, i8 addrspace(4)* %fptr, align 4
+ %fptr = addrspacecast i8 addrspace(1)* %gptr to i8*
+ %fload = load volatile i8, i8* %fptr, align 4
%ext = sext i8 %fload to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -100,8 +100,8 @@ define amdgpu_kernel void @sextload_flat
; CHECK-LABEL: zextload_flat_i8:
; CHECK: flat_load_ubyte
define amdgpu_kernel void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
- %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
- %fload = load volatile i8, i8 addrspace(4)* %fptr, align 4
+ %fptr = addrspacecast i8 addrspace(1)* %gptr to i8*
+ %fload = load volatile i8, i8* %fptr, align 4
%ext = zext i8 %fload to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -110,8 +110,8 @@ define amdgpu_kernel void @zextload_flat
; CHECK-LABEL: sextload_flat_i16:
; CHECK: flat_load_sshort
define amdgpu_kernel void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
- %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
- %fload = load volatile i16, i16 addrspace(4)* %fptr, align 4
+ %fptr = addrspacecast i16 addrspace(1)* %gptr to i16*
+ %fload = load volatile i16, i16* %fptr, align 4
%ext = sext i16 %fload to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -120,8 +120,8 @@ define amdgpu_kernel void @sextload_flat
; CHECK-LABEL: zextload_flat_i16:
; CHECK: flat_load_ushort
define amdgpu_kernel void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
- %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
- %fload = load volatile i16, i16 addrspace(4)* %fptr, align 4
+ %fptr = addrspacecast i16 addrspace(1)* %gptr to i16*
+ %fload = load volatile i16, i16* %fptr, align 4
%ext = zext i16 %fload to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -133,9 +133,9 @@ define amdgpu_kernel void @zextload_flat
; CHECK: flat_load_ubyte
; CHECK: flat_load_ubyte
define amdgpu_kernel void @flat_scratch_unaligned_load() {
- %scratch = alloca i32
- %fptr = addrspacecast i32* %scratch to i32 addrspace(4)*
- %ld = load volatile i32, i32 addrspace(4)* %fptr, align 1
+ %scratch = alloca i32, addrspace(5)
+ %fptr = addrspacecast i32 addrspace(5)* %scratch to i32*
+ %ld = load volatile i32, i32* %fptr, align 1
ret void
}
@@ -145,9 +145,9 @@ define amdgpu_kernel void @flat_scratch_
; CHECK: flat_store_byte
; CHECK: flat_store_byte
define amdgpu_kernel void @flat_scratch_unaligned_store() {
- %scratch = alloca i32
- %fptr = addrspacecast i32* %scratch to i32 addrspace(4)*
- store volatile i32 0, i32 addrspace(4)* %fptr, align 1
+ %scratch = alloca i32, addrspace(5)
+ %fptr = addrspacecast i32 addrspace(5)* %scratch to i32*
+ store volatile i32 0, i32* %fptr, align 1
ret void
}
@@ -156,9 +156,9 @@ define amdgpu_kernel void @flat_scratch_
; HSA: flat_load_dword
; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
define amdgpu_kernel void @flat_scratch_multidword_load() {
- %scratch = alloca <2 x i32>
- %fptr = addrspacecast <2 x i32>* %scratch to <2 x i32> addrspace(4)*
- %ld = load volatile <2 x i32>, <2 x i32> addrspace(4)* %fptr
+ %scratch = alloca <2 x i32>, addrspace(5)
+ %fptr = addrspacecast <2 x i32> addrspace(5)* %scratch to <2 x i32>*
+ %ld = load volatile <2 x i32>, <2 x i32>* %fptr
ret void
}
@@ -167,59 +167,59 @@ define amdgpu_kernel void @flat_scratch_
; HSA: flat_store_dword
; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
define amdgpu_kernel void @flat_scratch_multidword_store() {
- %scratch = alloca <2 x i32>
- %fptr = addrspacecast <2 x i32>* %scratch to <2 x i32> addrspace(4)*
- store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(4)* %fptr
+ %scratch = alloca <2 x i32>, addrspace(5)
+ %fptr = addrspacecast <2 x i32> addrspace(5)* %scratch to <2 x i32>*
+ store volatile <2 x i32> zeroinitializer, <2 x i32>* %fptr
ret void
}
; CHECK-LABEL: {{^}}store_flat_i8_max_offset:
; CIVI: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
; GFX9: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:4095{{$}}
-define amdgpu_kernel void @store_flat_i8_max_offset(i8 addrspace(4)* %fptr, i8 %x) #0 {
- %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4095
- store volatile i8 %x, i8 addrspace(4)* %fptr.offset
+define amdgpu_kernel void @store_flat_i8_max_offset(i8* %fptr, i8 %x) #0 {
+ %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 4095
+ store volatile i8 %x, i8* %fptr.offset
ret void
}
; CHECK-LABEL: {{^}}store_flat_i8_max_offset_p1:
; CHECK: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
-define amdgpu_kernel void @store_flat_i8_max_offset_p1(i8 addrspace(4)* %fptr, i8 %x) #0 {
- %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4096
- store volatile i8 %x, i8 addrspace(4)* %fptr.offset
+define amdgpu_kernel void @store_flat_i8_max_offset_p1(i8* %fptr, i8 %x) #0 {
+ %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 4096
+ store volatile i8 %x, i8* %fptr.offset
ret void
}
; CHECK-LABEL: {{^}}store_flat_i8_neg_offset:
; CHECK: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
-define amdgpu_kernel void @store_flat_i8_neg_offset(i8 addrspace(4)* %fptr, i8 %x) #0 {
- %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 -2
- store volatile i8 %x, i8 addrspace(4)* %fptr.offset
+define amdgpu_kernel void @store_flat_i8_neg_offset(i8* %fptr, i8 %x) #0 {
+ %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 -2
+ store volatile i8 %x, i8* %fptr.offset
ret void
}
; CHECK-LABEL: {{^}}load_flat_i8_max_offset:
; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
-define amdgpu_kernel void @load_flat_i8_max_offset(i8 addrspace(4)* %fptr) #0 {
- %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4095
- %val = load volatile i8, i8 addrspace(4)* %fptr.offset
+define amdgpu_kernel void @load_flat_i8_max_offset(i8* %fptr) #0 {
+ %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 4095
+ %val = load volatile i8, i8* %fptr.offset
ret void
}
; CHECK-LABEL: {{^}}load_flat_i8_max_offset_p1:
; CHECK: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
-define amdgpu_kernel void @load_flat_i8_max_offset_p1(i8 addrspace(4)* %fptr) #0 {
- %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4096
- %val = load volatile i8, i8 addrspace(4)* %fptr.offset
+define amdgpu_kernel void @load_flat_i8_max_offset_p1(i8* %fptr) #0 {
+ %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 4096
+ %val = load volatile i8, i8* %fptr.offset
ret void
}
; CHECK-LABEL: {{^}}load_flat_i8_neg_offset:
; CHECK: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
-define amdgpu_kernel void @load_flat_i8_neg_offset(i8 addrspace(4)* %fptr) #0 {
- %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 -2
- %val = load volatile i8, i8 addrspace(4)* %fptr.offset
+define amdgpu_kernel void @load_flat_i8_neg_offset(i8* %fptr) #0 {
+ %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 -2
+ %val = load volatile i8, i8* %fptr.offset
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll Fri Feb 2 08:07:16 2018
@@ -38,15 +38,15 @@ entry:
; NOHSA-NOADDR64: flat_store_dword
define amdgpu_kernel void @test_addr64(i32 addrspace(1)* %out) {
entry:
- %out.addr = alloca i32 addrspace(1)*, align 4
+ %out.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
- store i32 addrspace(1)* %out, i32 addrspace(1)** %out.addr, align 4
- %ld0 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
+ store i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(5)* %out.addr, align 4
+ %ld0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %out.addr, align 4
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %ld0, i32 0
store i32 1, i32 addrspace(1)* %arrayidx, align 4
- %ld1 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
+ %ld1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %out.addr, align 4
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %ld1, i32 1
store i32 2, i32 addrspace(1)* %arrayidx1, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/flat_atomics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/flat_atomics.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/flat_atomics.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/flat_atomics.ll Fri Feb 2 08:07:16 2018
@@ -5,29 +5,29 @@
; GCN-LABEL: {{^}}atomic_add_i32_offset:
; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_add_i32_max_offset:
; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:4092{{$}}
-define amdgpu_kernel void @atomic_add_i32_max_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32_max_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 1023
- %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 1023
+ %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_add_i32_max_offset_p1:
; GCN: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-define amdgpu_kernel void @atomic_add_i32_max_offset_p1(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32_max_offset_p1(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024
- %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 1024
+ %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
ret void
}
@@ -35,22 +35,22 @@ entry:
; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_add_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset:
; CIVI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
ret void
}
@@ -58,60 +58,60 @@ entry:
; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_add_i32:
; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_add_i32(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32(i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile add i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile add i32* %out, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_add_i32_ret:
; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_add_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32_ret(i32* %out, i32* %out2, i32 %in) {
entry:
- %val = atomicrmw volatile add i32 addrspace(4)* %out, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %val = atomicrmw volatile add i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_add_i32_addr64:
; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_add_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i32_addr64(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile add i32 addrspace(4)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile add i32* %ptr, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64:
; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile add i32 addrspace(4)* %ptr, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile add i32* %ptr, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_and_i32_offset:
; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_and_i32_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile and i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
ret void
}
@@ -119,22 +119,22 @@ entry:
; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_and_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_and_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile and i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset:
; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile and i32 addrspace(4)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
ret void
}
@@ -142,60 +142,60 @@ entry:
; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile and i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_and_i32:
; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_and_i32(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_and_i32(i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile and i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile and i32* %out, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_and_i32_ret:
; GCN: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_and_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_and_i32_ret(i32* %out, i32* %out2, i32 %in) {
entry:
- %val = atomicrmw volatile and i32 addrspace(4)* %out, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %val = atomicrmw volatile and i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_and_i32_addr64:
; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_and_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i32_addr64(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile and i32 addrspace(4)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile and i32* %ptr, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64:
; GCN: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile and i32 addrspace(4)* %ptr, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile and i32* %ptr, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i32_offset:
; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_sub_i32_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile sub i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
ret void
}
@@ -203,22 +203,22 @@ entry:
; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile sub i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset:
; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile sub i32 addrspace(4)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
ret void
}
@@ -226,60 +226,60 @@ entry:
; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile sub i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i32:
; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_sub_i32(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_sub_i32(i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile sub i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile sub i32* %out, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i32_ret:
; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_sub_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_sub_i32_ret(i32* %out, i32* %out2, i32 %in) {
entry:
- %val = atomicrmw volatile sub i32 addrspace(4)* %out, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %val = atomicrmw volatile sub i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i32_addr64:
; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_sub_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i32_addr64(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile sub i32 addrspace(4)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile sub i32* %ptr, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64:
; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile sub i32 addrspace(4)* %ptr, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile sub i32* %ptr, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_max_i32_offset:
; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_max_i32_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile max i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
ret void
}
@@ -287,22 +287,22 @@ entry:
; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_max_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_max_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile max i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset:
; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile max i32 addrspace(4)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
ret void
}
@@ -310,60 +310,60 @@ entry:
; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile max i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_max_i32:
; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_max_i32(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_max_i32(i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile max i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile max i32* %out, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_max_i32_ret:
; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_max_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_max_i32_ret(i32* %out, i32* %out2, i32 %in) {
entry:
- %val = atomicrmw volatile max i32 addrspace(4)* %out, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %val = atomicrmw volatile max i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_max_i32_addr64:
; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_max_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i32_addr64(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile max i32 addrspace(4)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile max i32* %ptr, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64:
; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile max i32 addrspace(4)* %ptr, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile max i32* %ptr, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i32_offset:
; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_umax_i32_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile umax i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
ret void
}
@@ -371,22 +371,22 @@ entry:
; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile umax i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset:
; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile umax i32 addrspace(4)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
ret void
}
@@ -394,60 +394,60 @@ entry:
; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile umax i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i32:
; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_umax_i32(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_umax_i32(i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile umax i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile umax i32* %out, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i32_ret:
; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umax_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_umax_i32_ret(i32* %out, i32* %out2, i32 %in) {
entry:
- %val = atomicrmw volatile umax i32 addrspace(4)* %out, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %val = atomicrmw volatile umax i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i32_addr64:
; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_umax_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i32_addr64(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile umax i32 addrspace(4)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile umax i32* %ptr, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64:
; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile umax i32 addrspace(4)* %ptr, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile umax i32* %ptr, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_min_i32_offset:
; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_min_i32_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile min i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
ret void
}
@@ -455,22 +455,22 @@ entry:
; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_min_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_min_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile min i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset:
; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile min i32 addrspace(4)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
ret void
}
@@ -478,60 +478,60 @@ entry:
; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile min i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_min_i32:
; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_min_i32(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_min_i32(i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile min i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile min i32* %out, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_min_i32_ret:
; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_min_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_min_i32_ret(i32* %out, i32* %out2, i32 %in) {
entry:
- %val = atomicrmw volatile min i32 addrspace(4)* %out, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %val = atomicrmw volatile min i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_min_i32_addr64:
; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_min_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i32_addr64(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile min i32 addrspace(4)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile min i32* %ptr, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64:
; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile min i32 addrspace(4)* %ptr, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile min i32* %ptr, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i32_offset:
; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_umin_i32_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile umin i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
ret void
}
@@ -539,22 +539,22 @@ entry:
; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile umin i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset:
; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile umin i32 addrspace(4)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
ret void
}
@@ -562,60 +562,60 @@ entry:
; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile umin i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i32:
; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_umin_i32(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_umin_i32(i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile umin i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile umin i32* %out, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i32_ret:
; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umin_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_umin_i32_ret(i32* %out, i32* %out2, i32 %in) {
entry:
- %val = atomicrmw volatile umin i32 addrspace(4)* %out, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %val = atomicrmw volatile umin i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i32_addr64:
; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_umin_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umin_i32_addr64(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile umin i32 addrspace(4)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile umin i32* %ptr, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64:
; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]{{$}}
- define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+ define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile umin i32 addrspace(4)* %ptr, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile umin i32* %ptr, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_or_i32_offset:
; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_or_i32_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile or i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
ret void
}
@@ -623,22 +623,22 @@ entry:
; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_or_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_or_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile or i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset:
; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile or i32 addrspace(4)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
ret void
}
@@ -646,60 +646,60 @@ entry:
; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile or i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_or_i32:
; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_or_i32(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_or_i32(i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile or i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile or i32* %out, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_or_i32_ret:
; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_or_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_or_i32_ret(i32* %out, i32* %out2, i32 %in) {
entry:
- %val = atomicrmw volatile or i32 addrspace(4)* %out, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %val = atomicrmw volatile or i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_or_i32_addr64:
; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_or_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i32_addr64(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile or i32 addrspace(4)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile or i32* %ptr, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64:
; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile or i32 addrspace(4)* %ptr, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile or i32* %ptr, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i32_offset:
; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_xchg_i32_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile xchg i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
ret void
}
@@ -707,22 +707,22 @@ entry:
; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile xchg i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile xchg i32 addrspace(4)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
ret void
}
@@ -730,50 +730,50 @@ entry:
; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile xchg i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i32:
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-define amdgpu_kernel void @atomic_xchg_i32(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_xchg_i32(i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i32_ret:
; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xchg_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_xchg_i32_ret(i32* %out, i32* %out2, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i32_addr64:
; GCN: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_xchg_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i32_addr64(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile xchg i32 addrspace(4)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile xchg i32* %ptr, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile xchg i32 addrspace(4)* %ptr, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile xchg i32* %ptr, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
@@ -782,10 +782,10 @@ entry:
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset:
; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
-define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(4)* %out, i32 %in, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
ret void
}
@@ -793,23 +793,23 @@ entry:
; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
-define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
%flag = extractvalue { i32, i1 } %val, 0
- store i32 %flag, i32 addrspace(4)* %out2
+ store i32 %flag, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
-define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32* %out, i32 %in, i64 %index, i32 %old) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
ret void
}
@@ -817,63 +817,63 @@ entry:
; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
-define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index, i32 %old) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
%flag = extractvalue { i32, i1 } %val, 0
- store i32 %flag, i32 addrspace(4)* %out2
+ store i32 %flag, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i32:
; GCN: flat_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-define amdgpu_kernel void @atomic_cmpxchg_i32(i32 addrspace(4)* %out, i32 %in, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32(i32* %out, i32 %in, i32 %old) {
entry:
- %val = cmpxchg volatile i32 addrspace(4)* %out, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile i32* %out, i32 %old, i32 %in seq_cst seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret:
; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
-define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32* %out, i32* %out2, i32 %in, i32 %old) {
entry:
- %val = cmpxchg volatile i32 addrspace(4)* %out, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile i32* %out, i32 %old, i32 %in seq_cst seq_cst
%flag = extractvalue { i32, i1 } %val, 0
- store i32 %flag, i32 addrspace(4)* %out2
+ store i32 %flag, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
; GCN: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
-define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32* %out, i32 %in, i64 %index, i32 %old) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = cmpxchg volatile i32 addrspace(4)* %ptr, i32 %old, i32 %in seq_cst seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = cmpxchg volatile i32* %ptr, i32 %old, i32 %in seq_cst seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64:
; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
-define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index, i32 %old) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = cmpxchg volatile i32 addrspace(4)* %ptr, i32 %old, i32 %in seq_cst seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = cmpxchg volatile i32* %ptr, i32 %old, i32 %in seq_cst seq_cst
%flag = extractvalue { i32, i1 } %val, 0
- store i32 %flag, i32 addrspace(4)* %out2
+ store i32 %flag, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i32_offset:
; CIVI: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GFX9: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_xor_i32_offset(i32* %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile xor i32 addrspace(4)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
ret void
}
@@ -881,22 +881,22 @@ entry:
; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = atomicrmw volatile xor i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset:
; CIVI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile xor i32 addrspace(4)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
ret void
}
@@ -904,50 +904,50 @@ entry:
; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = atomicrmw volatile xor i32 addrspace(4)* %gep, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i32:
; GCN: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-define amdgpu_kernel void @atomic_xor_i32(i32 addrspace(4)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_xor_i32(i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xor i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile xor i32* %out, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i32_ret:
; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xor_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
+define amdgpu_kernel void @atomic_xor_i32_ret(i32* %out, i32* %out2, i32 %in) {
entry:
- %val = atomicrmw volatile xor i32 addrspace(4)* %out, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %val = atomicrmw volatile xor i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i32_addr64:
; GCN: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
-define amdgpu_kernel void @atomic_xor_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i32_addr64(i32* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile xor i32 addrspace(4)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile xor i32* %ptr, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64:
; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %val = atomicrmw volatile xor i32 addrspace(4)* %ptr, i32 %in seq_cst
- store i32 %val, i32 addrspace(4)* %out2
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %val = atomicrmw volatile xor i32* %ptr, i32 %in seq_cst
+ store i32 %val, i32* %out2
ret void
}
@@ -955,21 +955,21 @@ entry:
; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_load_i32_offset(i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+define amdgpu_kernel void @atomic_load_i32_offset(i32* %in, i32* %out) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %in, i32 4
- %val = load atomic i32, i32 addrspace(4)* %gep seq_cst, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %gep = getelementptr i32, i32* %in, i32 4
+ %val = load atomic i32, i32* %gep seq_cst, align 4
+ store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}atomic_load_i32:
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_load_i32(i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+define amdgpu_kernel void @atomic_load_i32(i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in seq_cst, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in seq_cst, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -977,60 +977,60 @@ entry:
; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32 addrspace(4)* %in, i32 addrspace(4)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32* %in, i32* %out, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %in, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %val = load atomic i32, i32 addrspace(4)* %gep seq_cst, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %ptr = getelementptr i32, i32* %in, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %val = load atomic i32, i32* %gep seq_cst, align 4
+ store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}atomic_load_i32_addr64:
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_load_i32_addr64(i32 addrspace(4)* %in, i32 addrspace(4)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_load_i32_addr64(i32* %in, i32* %out, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %in, i64 %index
- %val = load atomic i32, i32 addrspace(4)* %ptr seq_cst, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %ptr = getelementptr i32, i32* %in, i64 %index
+ %val = load atomic i32, i32* %ptr seq_cst, align 4
+ store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}atomic_store_i32_offset:
; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(4)* %out) {
+define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32* %out) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- store atomic i32 %in, i32 addrspace(4)* %gep seq_cst, align 4
+ %gep = getelementptr i32, i32* %out, i32 4
+ store atomic i32 %in, i32* %gep seq_cst, align 4
ret void
}
; GCN-LABEL: {{^}}atomic_store_i32:
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @atomic_store_i32(i32 %in, i32 addrspace(4)* %out) {
+define amdgpu_kernel void @atomic_store_i32(i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out seq_cst, align 4
+ store atomic i32 %in, i32* %out seq_cst, align 4
ret void
}
; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset:
; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
-define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(4)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32* %out, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- store atomic i32 %in, i32 addrspace(4)* %gep seq_cst, align 4
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ store atomic i32 %in, i32* %gep seq_cst, align 4
ret void
}
; GCN-LABEL: {{^}}atomic_store_i32_addr64:
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32 addrspace(4)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32* %out, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
- store atomic i32 %in, i32 addrspace(4)* %ptr seq_cst, align 4
+ %ptr = getelementptr i32, i32* %out, i64 %index
+ store atomic i32 %in, i32* %ptr seq_cst, align 4
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/flat_atomics_i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/flat_atomics_i64.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/flat_atomics_i64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/flat_atomics_i64.ll Fri Feb 2 08:07:16 2018
@@ -3,973 +3,973 @@
; GCN-LABEL: {{^}}atomic_add_i64_offset:
; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
-define amdgpu_kernel void @atomic_add_i64_offset(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_add_i64_offset(i64* %out, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile add i64 addrspace(4)* %gep, i64 %in seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_add_i64_ret_offset:
; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_add_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_add_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile add i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_add_i64_addr64_offset:
; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
-define amdgpu_kernel void @atomic_add_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile add i64 addrspace(4)* %gep, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64_offset:
; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile add i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_add_i64:
; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_add_i64(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_add_i64(i64* %out, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile add i64 addrspace(4)* %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile add i64* %out, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_add_i64_ret:
; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_add_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_add_i64_ret(i64* %out, i64* %out2, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile add i64 addrspace(4)* %out, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %tmp0 = atomicrmw volatile add i64* %out, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_add_i64_addr64:
; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_add_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i64_addr64(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile add i64 addrspace(4)* %ptr, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile add i64* %ptr, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64:
; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_add_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile add i64 addrspace(4)* %ptr, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile add i64* %ptr, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_and_i64_offset:
; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_and_i64_offset(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_and_i64_offset(i64* %out, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile and i64 addrspace(4)* %gep, i64 %in seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_and_i64_ret_offset:
; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_and_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_and_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile and i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_and_i64_addr64_offset:
; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_and_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile and i64 addrspace(4)* %gep, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64_offset:
; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile and i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_and_i64:
; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_and_i64(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_and_i64(i64* %out, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile and i64 addrspace(4)* %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile and i64* %out, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_and_i64_ret:
; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_and_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_and_i64_ret(i64* %out, i64* %out2, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile and i64 addrspace(4)* %out, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %tmp0 = atomicrmw volatile and i64* %out, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_and_i64_addr64:
; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_and_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i64_addr64(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile and i64 addrspace(4)* %ptr, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile and i64* %ptr, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64:
; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_and_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile and i64 addrspace(4)* %ptr, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile and i64* %ptr, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i64_offset:
; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_sub_i64_offset(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_sub_i64_offset(i64* %out, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %gep, i64 %in seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i64_ret_offset:
; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_sub_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_sub_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i64_addr64_offset:
; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_sub_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %gep, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64_offset:
; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i64:
; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_sub_i64(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_sub_i64(i64* %out, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile sub i64* %out, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i64_ret:
; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_sub_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_sub_i64_ret(i64* %out, i64* %out2, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %out, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %tmp0 = atomicrmw volatile sub i64* %out, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i64_addr64:
; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_sub_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i64_addr64(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %ptr, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile sub i64* %ptr, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64:
; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_sub_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %ptr, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile sub i64* %ptr, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_max_i64_offset:
; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_max_i64_offset(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_max_i64_offset(i64* %out, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile max i64 addrspace(4)* %gep, i64 %in seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_max_i64_ret_offset:
; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_max_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_max_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile max i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_max_i64_addr64_offset:
; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_max_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile max i64 addrspace(4)* %gep, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64_offset:
; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile max i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_max_i64:
; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_max_i64(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_max_i64(i64* %out, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile max i64 addrspace(4)* %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile max i64* %out, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_max_i64_ret:
; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_max_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_max_i64_ret(i64* %out, i64* %out2, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile max i64 addrspace(4)* %out, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %tmp0 = atomicrmw volatile max i64* %out, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_max_i64_addr64:
; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_max_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i64_addr64(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile max i64 addrspace(4)* %ptr, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile max i64* %ptr, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64:
; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_max_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile max i64 addrspace(4)* %ptr, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile max i64* %ptr, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i64_offset:
; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_umax_i64_offset(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_umax_i64_offset(i64* %out, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %gep, i64 %in seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i64_ret_offset:
; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umax_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_umax_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i64_addr64_offset:
; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_umax_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %gep, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64_offset:
; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i64:
; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_umax_i64(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_umax_i64(i64* %out, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile umax i64* %out, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i64_ret:
; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umax_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_umax_i64_ret(i64* %out, i64* %out2, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %out, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %tmp0 = atomicrmw volatile umax i64* %out, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i64_addr64:
; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_umax_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i64_addr64(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %ptr, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile umax i64* %ptr, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64:
; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umax_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %ptr, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile umax i64* %ptr, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_min_i64_offset:
; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_min_i64_offset(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_min_i64_offset(i64* %out, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile min i64 addrspace(4)* %gep, i64 %in seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_min_i64_ret_offset:
; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_min_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_min_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile min i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_min_i64_addr64_offset:
; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_min_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile min i64 addrspace(4)* %gep, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64_offset:
; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile min i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_min_i64:
; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_min_i64(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_min_i64(i64* %out, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile min i64 addrspace(4)* %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile min i64* %out, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_min_i64_ret:
; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_min_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_min_i64_ret(i64* %out, i64* %out2, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile min i64 addrspace(4)* %out, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %tmp0 = atomicrmw volatile min i64* %out, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_min_i64_addr64:
; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_min_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i64_addr64(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile min i64 addrspace(4)* %ptr, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile min i64* %ptr, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64:
; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_min_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile min i64 addrspace(4)* %ptr, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile min i64* %ptr, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i64_offset:
; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_umin_i64_offset(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_umin_i64_offset(i64* %out, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %gep, i64 %in seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i64_ret_offset:
; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umin_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_umin_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i64_addr64_offset:
; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_umin_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umin_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %gep, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64_offset:
; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i64:
; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_umin_i64(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_umin_i64(i64* %out, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile umin i64* %out, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i64_ret:
; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umin_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_umin_i64_ret(i64* %out, i64* %out2, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %out, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %tmp0 = atomicrmw volatile umin i64* %out, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i64_addr64:
; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_umin_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umin_i64_addr64(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %ptr, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile umin i64* %ptr, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64:
; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_umin_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umin_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %ptr, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile umin i64* %ptr, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_or_i64_offset:
; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_or_i64_offset(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_or_i64_offset(i64* %out, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile or i64 addrspace(4)* %gep, i64 %in seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_or_i64_ret_offset:
; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_or_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_or_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile or i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_or_i64_addr64_offset:
; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_or_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile or i64 addrspace(4)* %gep, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64_offset:
; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile or i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_or_i64:
; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_or_i64(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_or_i64(i64* %out, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile or i64 addrspace(4)* %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile or i64* %out, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_or_i64_ret:
; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_or_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_or_i64_ret(i64* %out, i64* %out2, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile or i64 addrspace(4)* %out, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %tmp0 = atomicrmw volatile or i64* %out, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_or_i64_addr64:
; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_or_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i64_addr64(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile or i64 addrspace(4)* %ptr, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile or i64* %ptr, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64:
; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_or_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile or i64 addrspace(4)* %ptr, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile or i64* %ptr, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i64_offset:
; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_xchg_i64_offset(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_xchg_i64_offset(i64* %out, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %gep, i64 %in seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xchg_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_xchg_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i64_addr64_offset:
; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %gep, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64_offset:
; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i64:
; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_xchg_i64(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_xchg_i64(i64* %out, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xchg i64* %out, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i64_ret:
; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xchg_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_xchg_i64_ret(i64* %out, i64* %out2, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %out, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %tmp0 = atomicrmw volatile xchg i64* %out, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i64_addr64:
; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_xchg_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i64_addr64(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %ptr, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile xchg i64* %ptr, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64:
; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %ptr, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile xchg i64* %ptr, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i64_offset:
; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_xor_i64_offset(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_xor_i64_offset(i64* %out, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %gep, i64 %in seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i64_ret_offset:
; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xor_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_xor_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %gep = getelementptr i64, i64* %out, i64 4
+ %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i64_addr64_offset:
; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_xor_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %gep, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64_offset:
; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %gep, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i64:
; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_xor_i64(i64 addrspace(4)* %out, i64 %in) {
+define amdgpu_kernel void @atomic_xor_i64(i64* %out, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %out, i64 %in seq_cst
+ %tmp0 = atomicrmw volatile xor i64* %out, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i64_ret:
; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xor_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) {
+define amdgpu_kernel void @atomic_xor_i64_ret(i64* %out, i64* %out2, i64 %in) {
entry:
- %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %out, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %tmp0 = atomicrmw volatile xor i64* %out, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i64_addr64:
; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @atomic_xor_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i64_addr64(i64* %out, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %ptr, i64 %in seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile xor i64* %ptr, i64 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64:
; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_xor_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %ptr, i64 %in seq_cst
- store i64 %tmp0, i64 addrspace(4)* %out2
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %tmp0 = atomicrmw volatile xor i64* %ptr, i64 %in seq_cst
+ store i64 %tmp0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_load_i64_offset:
; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_load_i64_offset(i64 addrspace(4)* %in, i64 addrspace(4)* %out) {
+define amdgpu_kernel void @atomic_load_i64_offset(i64* %in, i64* %out) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %in, i64 4
- %val = load atomic i64, i64 addrspace(4)* %gep seq_cst, align 8
- store i64 %val, i64 addrspace(4)* %out
+ %gep = getelementptr i64, i64* %in, i64 4
+ %val = load atomic i64, i64* %gep seq_cst, align 8
+ store i64 %val, i64* %out
ret void
}
; GCN-LABEL: {{^}}atomic_load_i64:
; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_load_i64(i64 addrspace(4)* %in, i64 addrspace(4)* %out) {
+define amdgpu_kernel void @atomic_load_i64(i64* %in, i64* %out) {
entry:
- %val = load atomic i64, i64 addrspace(4)* %in seq_cst, align 8
- store i64 %val, i64 addrspace(4)* %out
+ %val = load atomic i64, i64* %in seq_cst, align 8
+ store i64 %val, i64* %out
ret void
}
; GCN-LABEL: {{^}}atomic_load_i64_addr64_offset:
; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_load_i64_addr64_offset(i64 addrspace(4)* %in, i64 addrspace(4)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_load_i64_addr64_offset(i64* %in, i64* %out, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %in, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %val = load atomic i64, i64 addrspace(4)* %gep seq_cst, align 8
- store i64 %val, i64 addrspace(4)* %out
+ %ptr = getelementptr i64, i64* %in, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %val = load atomic i64, i64* %gep seq_cst, align 8
+ store i64 %val, i64* %out
ret void
}
; GCN-LABEL: {{^}}atomic_load_i64_addr64:
; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @atomic_load_i64_addr64(i64 addrspace(4)* %in, i64 addrspace(4)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_load_i64_addr64(i64* %in, i64* %out, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %in, i64 %index
- %val = load atomic i64, i64 addrspace(4)* %ptr seq_cst, align 8
- store i64 %val, i64 addrspace(4)* %out
+ %ptr = getelementptr i64, i64* %in, i64 %index
+ %val = load atomic i64, i64* %ptr seq_cst, align 8
+ store i64 %val, i64* %out
ret void
}
; GCN-LABEL: {{^}}atomic_store_i64_offset:
; GCN: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-define amdgpu_kernel void @atomic_store_i64_offset(i64 %in, i64 addrspace(4)* %out) {
+define amdgpu_kernel void @atomic_store_i64_offset(i64 %in, i64* %out) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- store atomic i64 %in, i64 addrspace(4)* %gep seq_cst, align 8
+ %gep = getelementptr i64, i64* %out, i64 4
+ store atomic i64 %in, i64* %gep seq_cst, align 8
ret void
}
; GCN-LABEL: {{^}}atomic_store_i64:
; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}]
-define amdgpu_kernel void @atomic_store_i64(i64 %in, i64 addrspace(4)* %out) {
+define amdgpu_kernel void @atomic_store_i64(i64 %in, i64* %out) {
entry:
- store atomic i64 %in, i64 addrspace(4)* %out seq_cst, align 8
+ store atomic i64 %in, i64* %out seq_cst, align 8
ret void
}
; GCN-LABEL: {{^}}atomic_store_i64_addr64_offset:
; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
-define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, i64 addrspace(4)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, i64* %out, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- store atomic i64 %in, i64 addrspace(4)* %gep seq_cst, align 8
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ store atomic i64 %in, i64* %gep seq_cst, align 8
ret void
}
; GCN-LABEL: {{^}}atomic_store_i64_addr64:
; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
-define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, i64 addrspace(4)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, i64* %out, i64 %index) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- store atomic i64 %in, i64 addrspace(4)* %ptr seq_cst, align 8
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ store atomic i64 %in, i64* %ptr seq_cst, align 8
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_offset:
; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-define amdgpu_kernel void @atomic_cmpxchg_i64_offset(i64 addrspace(4)* %out, i64 %in, i64 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i64_offset(i64* %out, i64 %in, i64 %old) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %val = cmpxchg volatile i64 addrspace(4)* %gep, i64 %old, i64 %in seq_cst seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_soffset:
; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(i64 addrspace(4)* %out, i64 %in, i64 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(i64* %out, i64 %in, i64 %old) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 9000
- %val = cmpxchg volatile i64 addrspace(4)* %gep, i64 %old, i64 %in seq_cst seq_cst
+ %gep = getelementptr i64, i64* %out, i64 9000
+ %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_offset:
; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
-define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(i64* %out, i64* %out2, i64 %in, i64 %old) {
entry:
- %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4
- %val = cmpxchg volatile i64 addrspace(4)* %gep, i64 %old, i64 %in seq_cst seq_cst
+ %gep = getelementptr i64, i64* %out, i64 4
+ %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
- store i64 %extract0, i64 addrspace(4)* %out2
+ store i64 %extract0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64_offset:
; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index, i64 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(i64* %out, i64 %in, i64 %index, i64 %old) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %val = cmpxchg volatile i64 addrspace(4)* %gep, i64 %old, i64 %in seq_cst seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64_offset:
; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
-define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index, i64 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index, i64 %old) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4
- %val = cmpxchg volatile i64 addrspace(4)* %gep, i64 %old, i64 %in seq_cst seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %gep = getelementptr i64, i64* %ptr, i64 4
+ %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
- store i64 %extract0, i64 addrspace(4)* %out2
+ store i64 %extract0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64:
; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
-define amdgpu_kernel void @atomic_cmpxchg_i64(i64 addrspace(4)* %out, i64 %in, i64 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i64(i64* %out, i64 %in, i64 %old) {
entry:
- %val = cmpxchg volatile i64 addrspace(4)* %out, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile i64* %out, i64 %old, i64 %in seq_cst seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret:
; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
-define amdgpu_kernel void @atomic_cmpxchg_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i64_ret(i64* %out, i64* %out2, i64 %in, i64 %old) {
entry:
- %val = cmpxchg volatile i64 addrspace(4)* %out, i64 %old, i64 %in seq_cst seq_cst
+ %val = cmpxchg volatile i64* %out, i64 %old, i64 %in seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
- store i64 %extract0, i64 addrspace(4)* %out2
+ store i64 %extract0, i64* %out2
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64:
; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
-define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index, i64 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(i64* %out, i64 %in, i64 %index, i64 %old) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %val = cmpxchg volatile i64 addrspace(4)* %ptr, i64 %old, i64 %in seq_cst seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %val = cmpxchg volatile i64* %ptr, i64 %old, i64 %in seq_cst seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64:
; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
-define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index, i64 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index, i64 %old) {
entry:
- %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index
- %val = cmpxchg volatile i64 addrspace(4)* %ptr, i64 %old, i64 %in seq_cst seq_cst
+ %ptr = getelementptr i64, i64* %out, i64 %index
+ %val = cmpxchg volatile i64* %ptr, i64 %old, i64 %in seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
- store i64 %extract0, i64 addrspace(4)* %out2
+ store i64 %extract0, i64* %out2
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll Fri Feb 2 08:07:16 2018
@@ -18,8 +18,8 @@
; GCN-NOT: v_mov
; GCN: ds_write_b32 v0, v0
define void @func_mov_fi_i32() #0 {
- %alloca = alloca i32
- store volatile i32* %alloca, i32* addrspace(3)* undef
+ %alloca = alloca i32, addrspace(5)
+ store volatile i32 addrspace(5)* %alloca, i32 addrspace(5)* addrspace(3)* undef
ret void
}
@@ -42,9 +42,9 @@ define void @func_mov_fi_i32() #0 {
; GCN-NOT: v_mov
; GCN: ds_write_b32 v0, v0
define void @func_add_constant_to_fi_i32() #0 {
- %alloca = alloca [2 x i32], align 4
- %gep0 = getelementptr inbounds [2 x i32], [2 x i32]* %alloca, i32 0, i32 1
- store volatile i32* %gep0, i32* addrspace(3)* undef
+ %alloca = alloca [2 x i32], align 4, addrspace(5)
+ %gep0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %alloca, i32 0, i32 1
+ store volatile i32 addrspace(5)* %gep0, i32 addrspace(5)* addrspace(3)* undef
ret void
}
@@ -64,8 +64,8 @@ define void @func_add_constant_to_fi_i32
; GCN-NOT: v_mov
; GCN: ds_write_b32 v0, v0
define void @func_other_fi_user_i32() #0 {
- %alloca = alloca [2 x i32], align 4
- %ptrtoint = ptrtoint [2 x i32]* %alloca to i32
+ %alloca = alloca [2 x i32], align 4, addrspace(5)
+ %ptrtoint = ptrtoint [2 x i32] addrspace(5)* %alloca to i32
%mul = mul i32 %ptrtoint, 9
store volatile i32 %mul, i32 addrspace(3)* undef
ret void
@@ -74,16 +74,16 @@ define void @func_other_fi_user_i32() #0
; GCN-LABEL: {{^}}func_store_private_arg_i32_ptr:
; GCN: v_mov_b32_e32 v1, 15{{$}}
; GCN: buffer_store_dword v1, v0, s[0:3], s4 offen{{$}}
-define void @func_store_private_arg_i32_ptr(i32* %ptr) #0 {
- store volatile i32 15, i32* %ptr
+define void @func_store_private_arg_i32_ptr(i32 addrspace(5)* %ptr) #0 {
+ store volatile i32 15, i32 addrspace(5)* %ptr
ret void
}
; GCN-LABEL: {{^}}func_load_private_arg_i32_ptr:
; GCN: s_waitcnt
; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], s4 offen{{$}}
-define void @func_load_private_arg_i32_ptr(i32* %ptr) #0 {
- %val = load volatile i32, i32* %ptr
+define void @func_load_private_arg_i32_ptr(i32 addrspace(5)* %ptr) #0 {
+ %val = load volatile i32, i32 addrspace(5)* %ptr
ret void
}
@@ -102,11 +102,11 @@ define void @func_load_private_arg_i32_p
; GCN-NOT: v_mov
; GCN: ds_write_b32 v0, v0
-define void @void_func_byval_struct_i8_i32_ptr({ i8, i32 }* byval %arg0) #0 {
- %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 0
- %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 1
- %load1 = load i32, i32* %gep1
- store volatile i32* %gep1, i32* addrspace(3)* undef
+define void @void_func_byval_struct_i8_i32_ptr({ i8, i32 } addrspace(5)* byval %arg0) #0 {
+ %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0
+ %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1
+ %load1 = load i32, i32 addrspace(5)* %gep1
+ store volatile i32 addrspace(5)* %gep1, i32 addrspace(5)* addrspace(3)* undef
ret void
}
@@ -115,11 +115,11 @@ define void @void_func_byval_struct_i8_i
; GCN-NEXT: s_mov_b32 s5, s32
; GCN-NEXT: buffer_load_ubyte v0, off, s[0:3], s5
; GCN_NEXT: buffer_load_dword v1, off, s[0:3], s5 offset:4
-define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 }* byval %arg0) #0 {
- %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 0
- %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 1
- %load0 = load i8, i8* %gep0
- %load1 = load i32, i32* %gep1
+define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 } addrspace(5)* byval %arg0) #0 {
+ %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0
+ %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1
+ %load0 = load i8, i8 addrspace(5)* %gep0
+ %load1 = load i32, i32 addrspace(5)* %gep1
store volatile i8 %load0, i8 addrspace(3)* undef
store volatile i32 %load1, i32 addrspace(3)* undef
ret void
@@ -146,15 +146,15 @@ define void @void_func_byval_struct_i8_i
; GFX9: buffer_load_dword v1, v{{[0-9]+}}, s[0:3], s4 offen offset:4{{$}}
; GCN: ds_write_b32
-define void @void_func_byval_struct_i8_i32_ptr_nonentry_block({ i8, i32 }* byval %arg0, i32 %arg2) #0 {
+define void @void_func_byval_struct_i8_i32_ptr_nonentry_block({ i8, i32 } addrspace(5)* byval %arg0, i32 %arg2) #0 {
%cmp = icmp eq i32 %arg2, 0
br i1 %cmp, label %bb, label %ret
bb:
- %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 0
- %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 1
- %load1 = load volatile i32, i32* %gep1
- store volatile i32* %gep1, i32* addrspace(3)* undef
+ %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0
+ %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1
+ %load1 = load volatile i32, i32 addrspace(5)* %gep1
+ store volatile i32 addrspace(5)* %gep1, i32 addrspace(5)* addrspace(3)* undef
br label %ret
ret:
@@ -175,12 +175,12 @@ ret:
; GCN: v_mul_lo_i32 v0, v0, 9
; GCN: ds_write_b32 v0, v0
define void @func_other_fi_user_non_inline_imm_offset_i32() #0 {
- %alloca0 = alloca [128 x i32], align 4
- %alloca1 = alloca [8 x i32], align 4
- %gep0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca0, i32 0, i32 65
- %gep1 = getelementptr inbounds [8 x i32], [8 x i32]* %alloca1, i32 0, i32 0
- store volatile i32 7, i32* %gep0
- %ptrtoint = ptrtoint i32* %gep1 to i32
+ %alloca0 = alloca [128 x i32], align 4, addrspace(5)
+ %alloca1 = alloca [8 x i32], align 4, addrspace(5)
+ %gep0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca0, i32 0, i32 65
+ %gep1 = getelementptr inbounds [8 x i32], [8 x i32] addrspace(5)* %alloca1, i32 0, i32 0
+ store volatile i32 7, i32 addrspace(5)* %gep0
+ %ptrtoint = ptrtoint i32 addrspace(5)* %gep1 to i32
%mul = mul i32 %ptrtoint, 9
store volatile i32 %mul, i32 addrspace(3)* undef
ret void
@@ -199,20 +199,20 @@ define void @func_other_fi_user_non_inli
; GCN: v_mul_lo_i32 v0, v0, 9
; GCN: ds_write_b32 v0, v0
define void @func_other_fi_user_non_inline_imm_offset_i32_vcc_live() #0 {
- %alloca0 = alloca [128 x i32], align 4
- %alloca1 = alloca [8 x i32], align 4
+ %alloca0 = alloca [128 x i32], align 4, addrspace(5)
+ %alloca1 = alloca [8 x i32], align 4, addrspace(5)
%vcc = call i64 asm sideeffect "; def $0", "={VCC}"()
- %gep0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca0, i32 0, i32 65
- %gep1 = getelementptr inbounds [8 x i32], [8 x i32]* %alloca1, i32 0, i32 0
- store volatile i32 7, i32* %gep0
+ %gep0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca0, i32 0, i32 65
+ %gep1 = getelementptr inbounds [8 x i32], [8 x i32] addrspace(5)* %alloca1, i32 0, i32 0
+ store volatile i32 7, i32 addrspace(5)* %gep0
call void asm sideeffect "; use $0", "{VCC}"(i64 %vcc)
- %ptrtoint = ptrtoint i32* %gep1 to i32
+ %ptrtoint = ptrtoint i32 addrspace(5)* %gep1 to i32
%mul = mul i32 %ptrtoint, 9
store volatile i32 %mul, i32 addrspace(3)* undef
ret void
}
-declare void @func(<4 x float>* nocapture) #0
+declare void @func(<4 x float> addrspace(5)* nocapture) #0
; undef flag not preserved in eliminateFrameIndex when handling the
; stores in the middle block.
@@ -225,16 +225,16 @@ declare void @func(<4 x float>* nocaptur
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:
define void @undefined_stack_store_reg(float %arg, i32 %arg1) #0 {
bb:
- %tmp = alloca <4 x float>, align 16
+ %tmp = alloca <4 x float>, align 16, addrspace(5)
%tmp2 = insertelement <4 x float> undef, float %arg, i32 0
- store <4 x float> %tmp2, <4 x float>* undef
+ store <4 x float> %tmp2, <4 x float> addrspace(5)* undef
%tmp3 = icmp eq i32 %arg1, 0
br i1 %tmp3, label %bb4, label %bb5
bb4:
- call void @func(<4 x float>* nonnull undef)
- store <4 x float> %tmp2, <4 x float>* %tmp, align 16
- call void @func(<4 x float>* nonnull %tmp)
+ call void @func(<4 x float> addrspace(5)* nonnull undef)
+ store <4 x float> %tmp2, <4 x float> addrspace(5)* %tmp, align 16
+ call void @func(<4 x float> addrspace(5)* nonnull %tmp)
br label %bb5
bb5:
@@ -245,15 +245,15 @@ bb5:
; GCN: s_and_saveexec_b64
; GCN: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s5 offset:12
define void @alloca_ptr_nonentry_block(i32 %arg0) #0 {
- %alloca0 = alloca { i8, i32 }, align 4
+ %alloca0 = alloca { i8, i32 }, align 4, addrspace(5)
%cmp = icmp eq i32 %arg0, 0
br i1 %cmp, label %bb, label %ret
bb:
- %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %alloca0, i32 0, i32 0
- %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %alloca0, i32 0, i32 1
- %load1 = load volatile i32, i32* %gep1
- store volatile i32* %gep1, i32* addrspace(3)* undef
+ %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %alloca0, i32 0, i32 0
+ %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %alloca0, i32 0, i32 1
+ %load1 = load volatile i32, i32 addrspace(5)* %gep1
+ store volatile i32 addrspace(5)* %gep1, i32 addrspace(5)* addrspace(3)* undef
br label %ret
ret:
Modified: llvm/trunk/test/CodeGen/AMDGPU/function-args.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/function-args.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/function-args.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/function-args.ll Fri Feb 2 08:07:16 2018
@@ -506,8 +506,8 @@ define void @void_func_struct_i8_i32({ i
; GCN-DAG: buffer_load_dword v[[ELT1:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
; GCN-DAG: buffer_store_dword v[[ELT1]]
; GCN-DAG: buffer_store_byte v[[ELT0]]
-define void @void_func_byval_struct_i8_i32({ i8, i32 }* byval %arg0) #0 {
- %arg0.load = load { i8, i32 }, { i8, i32 }* %arg0
+define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval %arg0) #0 {
+ %arg0.load = load { i8, i32 }, { i8, i32 } addrspace(5)* %arg0
store { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef
ret void
}
@@ -520,9 +520,9 @@ define void @void_func_byval_struct_i8_i
; GCN: ds_write_b32 v0, v0
; GCN: s_setpc_b64
-define void @void_func_byval_struct_i8_i32_x2({ i8, i32 }* byval %arg0, { i8, i32 }* byval %arg1, i32 %arg2) #0 {
- %arg0.load = load volatile { i8, i32 }, { i8, i32 }* %arg0
- %arg1.load = load volatile { i8, i32 }, { i8, i32 }* %arg1
+define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval %arg0, { i8, i32 } addrspace(5)* byval %arg1, i32 %arg2) #0 {
+ %arg0.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg0
+ %arg1.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg1
store volatile { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef
store volatile { i8, i32 } %arg1.load, { i8, i32 } addrspace(1)* undef
store volatile i32 %arg2, i32 addrspace(3)* undef
@@ -535,9 +535,9 @@ define void @void_func_byval_struct_i8_i
; GCN-DAG: buffer_load_dword v[[ARG1_LOAD1:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
; GCN-DAG: buffer_store_dword v[[ARG0_LOAD]], off
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ARG1_LOAD0]]:[[ARG1_LOAD1]]{{\]}}, off
-define void @void_func_byval_i32_byval_i64(i32* byval %arg0, i64* byval %arg1) #0 {
- %arg0.load = load i32, i32* %arg0
- %arg1.load = load i64, i64* %arg1
+define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval %arg0, i64 addrspace(5)* byval %arg1) #0 {
+ %arg0.load = load i32, i32 addrspace(5)* %arg0
+ %arg1.load = load i64, i64 addrspace(5)* %arg1
store i32 %arg0.load, i32 addrspace(1)* undef
store i64 %arg1.load, i64 addrspace(1)* undef
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll Fri Feb 2 08:07:16 2018
@@ -276,7 +276,7 @@ define amdgpu_kernel void @test_double16
; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
; CHECK-NEXT: Args:
; CHECK-NEXT: - Name: a
-; CHECK-NEXT: TypeName: 'int *'
+; CHECK-NEXT: TypeName: 'int addrspace(5)*'
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
@@ -443,7 +443,7 @@ define amdgpu_kernel void @test_queue(%o
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_struct(%struct.A* byval %a)
+define amdgpu_kernel void @test_struct(%struct.A addrspace(5)* byval %a)
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20
!kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
ret void
@@ -539,7 +539,7 @@ define amdgpu_kernel void @test_multi_ar
; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
; CHECK-NEXT: Args:
; CHECK-NEXT: - Name: g
-; CHECK-NEXT: TypeName: 'int *'
+; CHECK-NEXT: TypeName: 'int addrspace(5)*'
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
@@ -547,7 +547,7 @@ define amdgpu_kernel void @test_multi_ar
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: - Name: c
-; CHECK-NEXT: TypeName: 'int *'
+; CHECK-NEXT: TypeName: 'int addrspace(5)*'
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
@@ -555,7 +555,7 @@ define amdgpu_kernel void @test_multi_ar
; CHECK-NEXT: AddrSpaceQual: Constant
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: - Name: l
-; CHECK-NEXT: TypeName: 'int *'
+; CHECK-NEXT: TypeName: 'int addrspace(5)*'
; CHECK-NEXT: Size: 4
; CHECK-NEXT: Align: 4
; CHECK-NEXT: ValueKind: DynamicSharedPointer
@@ -594,7 +594,7 @@ define amdgpu_kernel void @test_addr_spa
; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
; CHECK-NEXT: Args:
; CHECK-NEXT: - Name: a
-; CHECK-NEXT: TypeName: 'int *'
+; CHECK-NEXT: TypeName: 'int addrspace(5)*'
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
@@ -603,7 +603,7 @@ define amdgpu_kernel void @test_addr_spa
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: IsVolatile: true
; CHECK-NEXT: - Name: b
-; CHECK-NEXT: TypeName: 'int *'
+; CHECK-NEXT: TypeName: 'int addrspace(5)*'
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
@@ -613,7 +613,7 @@ define amdgpu_kernel void @test_addr_spa
; CHECK-NEXT: IsConst: true
; CHECK-NEXT: IsRestrict: true
; CHECK-NEXT: - Name: c
-; CHECK-NEXT: TypeName: 'int *'
+; CHECK-NEXT: TypeName: 'int addrspace(5)*'
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: Pipe
@@ -1043,7 +1043,7 @@ define amdgpu_kernel void @test_wgs_hint
; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
; CHECK-NEXT: Args:
; CHECK-NEXT: - Name: a
-; CHECK-NEXT: TypeName: 'int **'
+; CHECK-NEXT: TypeName: 'int addrspace(5)* addrspace(5)*'
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
@@ -1067,7 +1067,7 @@ define amdgpu_kernel void @test_wgs_hint
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_arg_ptr_to_ptr(i32* addrspace(1)* %a)
+define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 addrspace(5)* addrspace(1)* %a)
!kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80
!kernel_arg_base_type !80 !kernel_arg_type_qual !4 {
ret void
@@ -1103,7 +1103,7 @@ define amdgpu_kernel void @test_arg_ptr_
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
-define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B* byval %a)
+define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B addrspace(5)* byval %a)
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82
!kernel_arg_base_type !82 !kernel_arg_type_qual !4 {
ret void
@@ -1115,7 +1115,7 @@ define amdgpu_kernel void @test_arg_stru
; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
; CHECK-NEXT: Args:
; CHECK-NEXT: - Name: a
-; CHECK-NEXT: TypeName: 'global int* __attribute__((ext_vector_type(2)))'
+; CHECK-NEXT: TypeName: 'global int addrspace(5)* __attribute__((ext_vector_type(2)))'
; CHECK-NEXT: Size: 16
; CHECK-NEXT: Align: 16
; CHECK-NEXT: ValueKind: ByValue
@@ -1187,7 +1187,7 @@ define amdgpu_kernel void @test_arg_unkn
; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
; CHECK-NEXT: Args:
; CHECK-NEXT: - Name: a
-; CHECK-NEXT: TypeName: 'long *'
+; CHECK-NEXT: TypeName: 'long addrspace(5)*'
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
@@ -1195,7 +1195,7 @@ define amdgpu_kernel void @test_arg_unkn
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: - Name: b
-; CHECK-NEXT: TypeName: 'char *'
+; CHECK-NEXT: TypeName: 'char addrspace(5)*'
; CHECK-NEXT: Size: 4
; CHECK-NEXT: Align: 4
; CHECK-NEXT: ValueKind: DynamicSharedPointer
@@ -1204,7 +1204,7 @@ define amdgpu_kernel void @test_arg_unkn
; CHECK-NEXT: AddrSpaceQual: Local
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: - Name: c
-; CHECK-NEXT: TypeName: 'char2 *'
+; CHECK-NEXT: TypeName: 'char2 addrspace(5)*'
; CHECK-NEXT: Size: 4
; CHECK-NEXT: Align: 4
; CHECK-NEXT: ValueKind: DynamicSharedPointer
@@ -1213,7 +1213,7 @@ define amdgpu_kernel void @test_arg_unkn
; CHECK-NEXT: AddrSpaceQual: Local
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: - Name: d
-; CHECK-NEXT: TypeName: 'char3 *'
+; CHECK-NEXT: TypeName: 'char3 addrspace(5)*'
; CHECK-NEXT: Size: 4
; CHECK-NEXT: Align: 4
; CHECK-NEXT: ValueKind: DynamicSharedPointer
@@ -1222,7 +1222,7 @@ define amdgpu_kernel void @test_arg_unkn
; CHECK-NEXT: AddrSpaceQual: Local
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: - Name: e
-; CHECK-NEXT: TypeName: 'char4 *'
+; CHECK-NEXT: TypeName: 'char4 addrspace(5)*'
; CHECK-NEXT: Size: 4
; CHECK-NEXT: Align: 4
; CHECK-NEXT: ValueKind: DynamicSharedPointer
@@ -1231,7 +1231,7 @@ define amdgpu_kernel void @test_arg_unkn
; CHECK-NEXT: AddrSpaceQual: Local
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: - Name: f
-; CHECK-NEXT: TypeName: 'char8 *'
+; CHECK-NEXT: TypeName: 'char8 addrspace(5)*'
; CHECK-NEXT: Size: 4
; CHECK-NEXT: Align: 4
; CHECK-NEXT: ValueKind: DynamicSharedPointer
@@ -1240,7 +1240,7 @@ define amdgpu_kernel void @test_arg_unkn
; CHECK-NEXT: AddrSpaceQual: Local
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: - Name: g
-; CHECK-NEXT: TypeName: 'char16 *'
+; CHECK-NEXT: TypeName: 'char16 addrspace(5)*'
; CHECK-NEXT: Size: 4
; CHECK-NEXT: Align: 4
; CHECK-NEXT: ValueKind: DynamicSharedPointer
@@ -1309,7 +1309,7 @@ define amdgpu_kernel void @test_pointee_
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
define amdgpu_kernel void @__test_block_invoke_kernel(
- <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }> %arg) #0
+ <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> %arg) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !110
!kernel_arg_base_type !110 !kernel_arg_type_qual !4 {
ret void
@@ -1380,7 +1380,7 @@ attributes #1 = { "calls-enqueue-kernel"
!13 = !{!"half8"}
!14 = !{!"float16"}
!15 = !{!"double16"}
-!16 = !{!"int *"}
+!16 = !{!"int addrspace(5)*"}
!17 = !{!"image2d_t"}
!18 = !{!"sampler_t"}
!19 = !{!"queue_t"}
@@ -1396,23 +1396,23 @@ attributes #1 = { "calls-enqueue-kernel"
!29 = !{i8 undef, i32 1}
!30 = !{i16 undef, i32 1}
!31 = !{i64 undef, i32 1}
-!32 = !{i32 *undef, i32 1}
+!32 = !{i32 addrspace(5)*undef, i32 1}
!50 = !{i32 1, i32 2, i32 3}
-!51 = !{!"int *", !"int *", !"int *"}
+!51 = !{!"int addrspace(5)*", !"int addrspace(5)*", !"int addrspace(5)*"}
!60 = !{i32 1, i32 1, i32 1}
!61 = !{!"read_only", !"write_only", !"read_write"}
!62 = !{!"image1d_t", !"image2d_t", !"image3d_t"}
!70 = !{!"volatile", !"const restrict", !"pipe"}
-!80 = !{!"int **"}
+!80 = !{!"int addrspace(5)* addrspace(5)*"}
!81 = !{i32 1}
!82 = !{!"struct B"}
-!83 = !{!"global int* __attribute__((ext_vector_type(2)))"}
+!83 = !{!"global int addrspace(5)* __attribute__((ext_vector_type(2)))"}
!84 = !{!"clk_event_t"}
!opencl.ocl.version = !{!90}
!90 = !{i32 2, i32 0}
!91 = !{i32 0, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3}
!92 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"}
-!93 = !{!"long *", !"char *", !"char2 *", !"char3 *", !"char4 *", !"char8 *", !"char16 *"}
+!93 = !{!"long addrspace(5)*", !"char addrspace(5)*", !"char2 addrspace(5)*", !"char3 addrspace(5)*", !"char4 addrspace(5)*", !"char8 addrspace(5)*", !"char16 addrspace(5)*"}
!94 = !{!"", !"", !"", !"", !"", !"", !""}
!100 = !{!"1:1:4:%d\5Cn"}
!101 = !{!"2:1:8:%g\5Cn"}
Modified: llvm/trunk/test/CodeGen/AMDGPU/huge-private-buffer.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/huge-private-buffer.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/huge-private-buffer.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/huge-private-buffer.ll Fri Feb 2 08:07:16 2018
@@ -5,9 +5,9 @@
; GCN-NOT: [[FI]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FI]]
define amdgpu_kernel void @scratch_buffer_known_high_bit_small() #0 {
- %alloca = alloca i32, align 4
- store volatile i32 0, i32* %alloca
- %toint = ptrtoint i32* %alloca to i32
+ %alloca = alloca i32, align 4, addrspace(5)
+ store volatile i32 0, i32 addrspace(5)* %alloca
+ %toint = ptrtoint i32 addrspace(5)* %alloca to i32
%masked = and i32 %toint, 2147483647
store volatile i32 %masked, i32 addrspace(1)* undef
ret void
@@ -19,9 +19,9 @@ define amdgpu_kernel void @scratch_buffe
; GCN-DAG: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x7ffffffc, [[FI]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
define amdgpu_kernel void @scratch_buffer_known_high_bit_huge() #1 {
- %alloca = alloca i32, align 4
- store volatile i32 0, i32* %alloca
- %toint = ptrtoint i32* %alloca to i32
+ %alloca = alloca i32, align 4, addrspace(5)
+ store volatile i32 0, i32 addrspace(5)* %alloca
+ %toint = ptrtoint i32 addrspace(5)* %alloca to i32
%masked = and i32 %toint, 2147483647
store volatile i32 %masked, i32 addrspace(1)* undef
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/indirect-private-64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/indirect-private-64.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/indirect-private-64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/indirect-private-64.ll Fri Feb 2 08:07:16 2018
@@ -22,11 +22,11 @@ declare void @llvm.amdgcn.s.barrier() #0
; CI-PROMOTE: ds_read_b64
define amdgpu_kernel void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) #1 {
%val = load double, double addrspace(1)* %in, align 8
- %array = alloca [8 x double], align 8
- %ptr = getelementptr inbounds [8 x double], [8 x double]* %array, i32 0, i32 %b
- store double %val, double* %ptr, align 8
+ %array = alloca [8 x double], align 8, addrspace(5)
+ %ptr = getelementptr inbounds [8 x double], [8 x double] addrspace(5)* %array, i32 0, i32 %b
+ store double %val, double addrspace(5)* %ptr, align 8
call void @llvm.amdgcn.s.barrier()
- %result = load double, double* %ptr, align 8
+ %result = load double, double addrspace(5)* %ptr, align 8
store double %result, double addrspace(1)* %out, align 8
ret void
}
@@ -53,11 +53,11 @@ define amdgpu_kernel void @private_acces
; CI-PROMOTE: ds_read2_b64
define amdgpu_kernel void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) #1 {
%val = load <2 x double>, <2 x double> addrspace(1)* %in, align 16
- %array = alloca [4 x <2 x double>], align 16
- %ptr = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* %array, i32 0, i32 %b
- store <2 x double> %val, <2 x double>* %ptr, align 16
+ %array = alloca [4 x <2 x double>], align 16, addrspace(5)
+ %ptr = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>] addrspace(5)* %array, i32 0, i32 %b
+ store <2 x double> %val, <2 x double> addrspace(5)* %ptr, align 16
call void @llvm.amdgcn.s.barrier()
- %result = load <2 x double>, <2 x double>* %ptr, align 16
+ %result = load <2 x double>, <2 x double> addrspace(5)* %ptr, align 16
store <2 x double> %result, <2 x double> addrspace(1)* %out, align 16
ret void
}
@@ -79,11 +79,11 @@ define amdgpu_kernel void @private_acces
; CI-PROMOTE: ds_read_b64
define amdgpu_kernel void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) #1 {
%val = load i64, i64 addrspace(1)* %in, align 8
- %array = alloca [8 x i64], align 8
- %ptr = getelementptr inbounds [8 x i64], [8 x i64]* %array, i32 0, i32 %b
- store i64 %val, i64* %ptr, align 8
+ %array = alloca [8 x i64], align 8, addrspace(5)
+ %ptr = getelementptr inbounds [8 x i64], [8 x i64] addrspace(5)* %array, i32 0, i32 %b
+ store i64 %val, i64 addrspace(5)* %ptr, align 8
call void @llvm.amdgcn.s.barrier()
- %result = load i64, i64* %ptr, align 8
+ %result = load i64, i64 addrspace(5)* %ptr, align 8
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
@@ -111,11 +111,11 @@ define amdgpu_kernel void @private_acces
; CI-PROMOTE: ds_read2_b64
define amdgpu_kernel void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) #1 {
%val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
- %array = alloca [4 x <2 x i64>], align 16
- %ptr = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %array, i32 0, i32 %b
- store <2 x i64> %val, <2 x i64>* %ptr, align 16
+ %array = alloca [4 x <2 x i64>], align 16, addrspace(5)
+ %ptr = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>] addrspace(5)* %array, i32 0, i32 %b
+ store <2 x i64> %val, <2 x i64> addrspace(5)* %ptr, align 16
call void @llvm.amdgcn.s.barrier()
- %result = load <2 x i64>, <2 x i64>* %ptr, align 16
+ %result = load <2 x i64>, <2 x i64> addrspace(5)* %ptr, align 16
store <2 x i64> %result, <2 x i64> addrspace(1)* %out, align 16
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/insert_subreg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/insert_subreg.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/insert_subreg.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/insert_subreg.ll Fri Feb 2 08:07:16 2018
@@ -8,8 +8,8 @@
; CHECK-LABEL: test:
define amdgpu_kernel void @test(i64 addrspace(1)* %out) {
entry:
- %tmp0 = alloca [16 x i32]
- %tmp1 = ptrtoint [16 x i32]* %tmp0 to i32
+ %tmp0 = alloca [16 x i32], addrspace(5)
+ %tmp1 = ptrtoint [16 x i32] addrspace(5)* %tmp0 to i32
%tmp2 = sext i32 %tmp1 to i64
store i64 %tmp2, i64 addrspace(1)* %out
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir Fri Feb 2 08:07:16 2018
@@ -17,9 +17,9 @@
define amdgpu_kernel void @mov_fed_hazard_crash_on_dbg_value(i32 addrspace(1)* %A) {
entry:
- %A.addr = alloca i32 addrspace(1)*, align 4
- store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
- call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !5, metadata !11), !dbg !12
+ %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !5, metadata !11), !dbg !12
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll Fri Feb 2 08:07:16 2018
@@ -6,39 +6,39 @@
; CHECK-LABEL: {{^}}no_args:
; CHECK: ScratchSize: 5{{$}}
define amdgpu_kernel void @no_args() {
- %alloca = alloca i8
- store volatile i8 0, i8* %alloca
+ %alloca = alloca i8, addrspace(5)
+ store volatile i8 0, i8 addrspace(5)* %alloca
ret void
}
; CHECK-LABEL: {{^}}force_align32:
; CHECK: ScratchSize: 5{{$}}
define amdgpu_kernel void @force_align32(<8 x i32>) {
- %alloca = alloca i8
- store volatile i8 0, i8* %alloca
+ %alloca = alloca i8, addrspace(5)
+ store volatile i8 0, i8 addrspace(5)* %alloca
ret void
}
; CHECK-LABEL: {{^}}force_align64:
; CHECK: ScratchSize: 5{{$}}
define amdgpu_kernel void @force_align64(<16 x i32>) {
- %alloca = alloca i8
- store volatile i8 0, i8* %alloca
+ %alloca = alloca i8, addrspace(5)
+ store volatile i8 0, i8 addrspace(5)* %alloca
ret void
}
; CHECK-LABEL: {{^}}force_align128:
; CHECK: ScratchSize: 5{{$}}
define amdgpu_kernel void @force_align128(<32 x i32>) {
- %alloca = alloca i8
- store volatile i8 0, i8* %alloca
+ %alloca = alloca i8, addrspace(5)
+ store volatile i8 0, i8 addrspace(5)* %alloca
ret void
}
; CHECK-LABEL: {{^}}force_align256:
; CHECK: ScratchSize: 5{{$}}
define amdgpu_kernel void @force_align256(<64 x i32>) {
- %alloca = alloca i8
- store volatile i8 0, i8* %alloca
+ %alloca = alloca i8, addrspace(5)
+ store volatile i8 0, i8 addrspace(5)* %alloca
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/large-alloca-compute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/large-alloca-compute.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/large-alloca-compute.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/large-alloca-compute.ll Fri Feb 2 08:07:16 2018
@@ -46,14 +46,14 @@
; GCNHSA: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s9 offen
; GCNHSA: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s9 offen
-; Scratch size = alloca size + emergency stack slot
+; Scratch size = alloca size + emergency stack slot, align {{.*}}, addrspace(5)
; ALL: ; ScratchSize: 32772
define amdgpu_kernel void @large_alloca_compute_shader(i32 %x, i32 %y) #0 {
- %large = alloca [8192 x i32], align 4
- %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
- store volatile i32 %x, i32* %gep
- %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
- %val = load volatile i32, i32* %gep1
+ %large = alloca [8192 x i32], align 4, addrspace(5)
+ %gep = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 8191
+ store volatile i32 %x, i32 addrspace(5)* %gep
+ %gep1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 %y
+ %val = load volatile i32, i32 addrspace(5)* %gep1
store volatile i32 %val, i32 addrspace(1)* undef
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll Fri Feb 2 08:07:16 2018
@@ -15,11 +15,11 @@
; ALL: ; ScratchSize: 32772
define amdgpu_ps void @large_alloca_pixel_shader(i32 %x, i32 %y) #0 {
- %large = alloca [8192 x i32], align 4
- %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
- store volatile i32 %x, i32* %gep
- %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
- %val = load volatile i32, i32* %gep1
+ %large = alloca [8192 x i32], align 4, addrspace(5)
+ %gep = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 8191
+ store volatile i32 %x, i32 addrspace(5)* %gep
+ %gep1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 %y
+ %val = load volatile i32, i32 addrspace(5)* %gep1
store volatile i32 %val, i32 addrspace(1)* undef
ret void
}
@@ -37,11 +37,11 @@ define amdgpu_ps void @large_alloca_pixe
; ALL: ; ScratchSize: 32772
define amdgpu_ps void @large_alloca_pixel_shader_inreg(i32 inreg %x, i32 inreg %y) #0 {
- %large = alloca [8192 x i32], align 4
- %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
- store volatile i32 %x, i32* %gep
- %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
- %val = load volatile i32, i32* %gep1
+ %large = alloca [8192 x i32], align 4, addrspace(5)
+ %gep = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 8191
+ store volatile i32 %x, i32 addrspace(5)* %gep
+ %gep1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 %y
+ %val = load volatile i32, i32 addrspace(5)* %gep1
store volatile i32 %val, i32 addrspace(1)* undef
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll Fri Feb 2 08:07:16 2018
@@ -4,11 +4,11 @@
declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2
declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* nocapture, i32, i32, i32, i1) #2
+declare i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2
declare i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* nocapture, i64, i32, i32, i1) #2
+declare i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
declare i32 @llvm.amdgcn.workitem.id.x() #1
@@ -159,9 +159,9 @@ define amdgpu_kernel void @global_atomic
; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
-define amdgpu_kernel void @flat_atomic_dec_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
- %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %ptr, i32 42, i32 0, i32 0, i1 false)
- store i32 %result, i32 addrspace(4)* %out
+define amdgpu_kernel void @flat_atomic_dec_ret_i32(i32* %out, i32* %ptr) #0 {
+ %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
+ store i32 %result, i32* %out
ret void
}
@@ -169,18 +169,18 @@ define amdgpu_kernel void @flat_atomic_d
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; CIVI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
; GFX9: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}}
-define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false)
- store i32 %result, i32 addrspace(4)* %out
+define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(i32* %out, i32* %ptr) #0 {
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
+ store i32 %result, i32* %out
ret void
}
; GCN-LABEL: {{^}}flat_atomic_dec_noret_i32:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
-define amdgpu_kernel void @flat_atomic_dec_noret_i32(i32 addrspace(4)* %ptr) nounwind {
- %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %ptr, i32 42, i32 0, i32 0, i1 false)
+define amdgpu_kernel void @flat_atomic_dec_noret_i32(i32* %ptr) nounwind {
+ %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
ret void
}
@@ -188,9 +188,9 @@ define amdgpu_kernel void @flat_atomic_d
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; CIVI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
; GFX9: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16{{$}}
-define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false)
+define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(i32* %ptr) nounwind {
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
ret void
}
@@ -198,13 +198,13 @@ define amdgpu_kernel void @flat_atomic_d
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; CIVI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
; GFX9: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}}
-define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
- %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id
- %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 %id
- %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5
- %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false)
- store i32 %result, i32 addrspace(4)* %out.gep
+ %gep.tid = getelementptr i32, i32* %ptr, i32 %id
+ %out.gep = getelementptr i32, i32* %out, i32 %id
+ %gep = getelementptr i32, i32* %gep.tid, i32 5
+ %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
+ store i32 %result, i32* %out.gep
ret void
}
@@ -212,11 +212,11 @@ define amdgpu_kernel void @flat_atomic_d
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; CIVI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
; GFX9: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}}
-define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(i32 addrspace(4)* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(i32* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
- %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id
- %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5
- %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false)
+ %gep.tid = getelementptr i32, i32* %ptr, i32 %id
+ %gep = getelementptr i32, i32* %gep.tid, i32 5
+ %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
ret void
}
@@ -224,9 +224,9 @@ define amdgpu_kernel void @flat_atomic_d
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
-define amdgpu_kernel void @flat_atomic_dec_ret_i64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
- %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %ptr, i64 42, i32 0, i32 0, i1 false)
- store i64 %result, i64 addrspace(4)* %out
+define amdgpu_kernel void @flat_atomic_dec_ret_i64(i64* %out, i64* %ptr) #0 {
+ %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
+ store i64 %result, i64* %out
ret void
}
@@ -235,10 +235,10 @@ define amdgpu_kernel void @flat_atomic_d
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 glc{{$}}
-define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4
- %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false)
- store i64 %result, i64 addrspace(4)* %out
+define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(i64* %out, i64* %ptr) #0 {
+ %gep = getelementptr i64, i64* %ptr, i32 4
+ %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
+ store i64 %result, i64* %out
ret void
}
@@ -246,8 +246,8 @@ define amdgpu_kernel void @flat_atomic_d
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
-define amdgpu_kernel void @flat_atomic_dec_noret_i64(i64 addrspace(4)* %ptr) nounwind {
- %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %ptr, i64 42, i32 0, i32 0, i1 false)
+define amdgpu_kernel void @flat_atomic_dec_noret_i64(i64* %ptr) nounwind {
+ %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
ret void
}
@@ -256,9 +256,9 @@ define amdgpu_kernel void @flat_atomic_d
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
-define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64 addrspace(4)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4
- %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false)
+define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64* %ptr) nounwind {
+ %gep = getelementptr i64, i64* %ptr, i32 4
+ %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
ret void
}
@@ -267,13 +267,13 @@ define amdgpu_kernel void @flat_atomic_d
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40 glc{{$}}
-define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
- %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id
- %out.gep = getelementptr i64, i64 addrspace(4)* %out, i32 %id
- %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5
- %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false)
- store i64 %result, i64 addrspace(4)* %out.gep
+ %gep.tid = getelementptr i64, i64* %ptr, i32 %id
+ %out.gep = getelementptr i64, i64* %out, i32 %id
+ %gep = getelementptr i64, i64* %gep.tid, i32 5
+ %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
+ store i64 %result, i64* %out.gep
ret void
}
@@ -282,11 +282,11 @@ define amdgpu_kernel void @flat_atomic_d
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40{{$}}
-define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64 addrspace(4)* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
- %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id
- %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5
- %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false)
+ %gep.tid = getelementptr i64, i64* %ptr, i32 %id
+ %gep = getelementptr i64, i64* %gep.tid, i32 5
+ %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll Fri Feb 2 08:07:16 2018
@@ -4,11 +4,11 @@
declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2
declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* nocapture, i32, i32, i32, i1) #2
+declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2
declare i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* nocapture, i64, i32, i32, i1) #2
+declare i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
declare i32 @llvm.amdgcn.workitem.id.x() #1
@@ -261,9 +261,9 @@ define amdgpu_kernel void @global_atomic
; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; GCN: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
-define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
- %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %ptr, i32 42, i32 0, i32 0, i1 false)
- store i32 %result, i32 addrspace(4)* %out
+define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32* %out, i32* %ptr) #0 {
+ %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
+ store i32 %result, i32* %out
ret void
}
@@ -271,18 +271,18 @@ define amdgpu_kernel void @flat_atomic_i
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}}
-define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false)
- store i32 %result, i32 addrspace(4)* %out
+define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32* %out, i32* %ptr) #0 {
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
+ store i32 %result, i32* %out
ret void
}
; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; GCN: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
-define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32 addrspace(4)* %ptr) nounwind {
- %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %ptr, i32 42, i32 0, i32 0, i1 false)
+define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32* %ptr) nounwind {
+ %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
ret void
}
@@ -290,9 +290,9 @@ define amdgpu_kernel void @flat_atomic_i
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16{{$}}
-define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
- %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false)
+define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32* %ptr) nounwind {
+ %gep = getelementptr i32, i32* %ptr, i32 4
+ %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
ret void
}
@@ -300,13 +300,13 @@ define amdgpu_kernel void @flat_atomic_i
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}}
-define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
- %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id
- %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 %id
- %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5
- %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false)
- store i32 %result, i32 addrspace(4)* %out.gep
+ %gep.tid = getelementptr i32, i32* %ptr, i32 %id
+ %out.gep = getelementptr i32, i32* %out, i32 %id
+ %gep = getelementptr i32, i32* %gep.tid, i32 5
+ %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
+ store i32 %result, i32* %out.gep
ret void
}
@@ -314,11 +314,11 @@ define amdgpu_kernel void @flat_atomic_i
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}}
-define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32 addrspace(4)* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
- %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id
- %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5
- %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false)
+ %gep.tid = getelementptr i32, i32* %ptr, i32 %id
+ %gep = getelementptr i32, i32* %gep.tid, i32 5
+ %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
ret void
}
@@ -341,9 +341,9 @@ define amdgpu_kernel void @atomic_inc_sh
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
-define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
- %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %ptr, i64 42, i32 0, i32 0, i1 false)
- store i64 %result, i64 addrspace(4)* %out
+define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr) #0 {
+ %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
+ store i64 %result, i64* %out
ret void
}
@@ -352,10 +352,10 @@ define amdgpu_kernel void @flat_atomic_i
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 glc{{$}}
-define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4
- %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false)
- store i64 %result, i64 addrspace(4)* %out
+define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr) #0 {
+ %gep = getelementptr i64, i64* %ptr, i32 4
+ %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
+ store i64 %result, i64* %out
ret void
}
@@ -363,8 +363,8 @@ define amdgpu_kernel void @flat_atomic_i
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
-define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64 addrspace(4)* %ptr) nounwind {
- %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %ptr, i64 42, i32 0, i32 0, i1 false)
+define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind {
+ %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
ret void
}
@@ -373,9 +373,9 @@ define amdgpu_kernel void @flat_atomic_i
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
-define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64 addrspace(4)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4
- %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false)
+define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr) nounwind {
+ %gep = getelementptr i64, i64* %ptr, i32 4
+ %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
ret void
}
@@ -384,13 +384,13 @@ define amdgpu_kernel void @flat_atomic_i
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40 glc{{$}}
-define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
- %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id
- %out.gep = getelementptr i64, i64 addrspace(4)* %out, i32 %id
- %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5
- %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false)
- store i64 %result, i64 addrspace(4)* %out.gep
+ %gep.tid = getelementptr i64, i64* %ptr, i32 %id
+ %out.gep = getelementptr i64, i64* %out, i32 %id
+ %gep = getelementptr i64, i64* %gep.tid, i32 5
+ %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
+ store i64 %result, i64* %out.gep
ret void
}
@@ -399,11 +399,11 @@ define amdgpu_kernel void @flat_atomic_i
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40{{$}}
-define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64 addrspace(4)* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
- %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id
- %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5
- %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false)
+ %gep.tid = getelementptr i64, i64* %ptr, i32 %id
+ %gep = getelementptr i64, i64* %gep.tid, i32 5
+ %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll Fri Feb 2 08:07:16 2018
@@ -8,8 +8,8 @@
; GCN-NEXT: s_waitcnt
; GCN-NEXT: ; return
define amdgpu_ps i32 @test_ps() #1 {
- %alloca = alloca i32
- store volatile i32 0, i32* %alloca
+ %alloca = alloca i32, addrspace(5)
+ store volatile i32 0, i32 addrspace(5)* %alloca
%implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr()
%buffer_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)*
%value = load volatile i32, i32 addrspace(2)* %buffer_ptr
@@ -21,8 +21,8 @@ define amdgpu_ps i32 @test_ps() #1 {
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[4:7], s2 offset:4
; GCN: s_load_dword s0, s[0:1], 0x0
define amdgpu_cs i32 @test_cs() #1 {
- %alloca = alloca i32
- store volatile i32 0, i32* %alloca
+ %alloca = alloca i32, addrspace(5)
+ store volatile i32 0, i32 addrspace(5)* %alloca
%implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr()
%buffer_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)*
%value = load volatile i32, i32 addrspace(2)* %buffer_ptr
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-hi16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-hi16.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-hi16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-hi16.ll Fri Feb 2 08:07:16 2018
@@ -221,9 +221,9 @@ entry:
; VI: flat_load_ushort v{{[0-9]+}}
; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
; VI: v_or_b32_sdwa
-define void @load_flat_hi_v2i16_reglo_vreg(i16 addrspace(4)* %in, i16 %reg) #0 {
+define void @load_flat_hi_v2i16_reglo_vreg(i16* %in, i16 %reg) #0 {
entry:
- %load = load i16, i16 addrspace(4)* %in
+ %load = load i16, i16* %in
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -241,9 +241,9 @@ entry:
; VI: flat_load_ushort v{{[0-9]+}}
; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
; VI: v_or_b32_sdwa
-define void @load_flat_hi_v2f16_reglo_vreg(half addrspace(4)* %in, half %reg) #0 {
+define void @load_flat_hi_v2f16_reglo_vreg(half* %in, half %reg) #0 {
entry:
- %load = load half, half addrspace(4)* %in
+ %load = load half, half* %in
%build0 = insertelement <2 x half> undef, half %reg, i32 0
%build1 = insertelement <2 x half> %build0, half %load, i32 1
store <2 x half> %build1, <2 x half> addrspace(1)* undef
@@ -261,9 +261,9 @@ entry:
; VI: flat_load_ubyte v{{[0-9]+}}
; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
; VI: v_or_b32_sdwa
-define void @load_flat_hi_v2i16_reglo_vreg_zexti8(i8 addrspace(4)* %in, i16 %reg) #0 {
+define void @load_flat_hi_v2i16_reglo_vreg_zexti8(i8* %in, i16 %reg) #0 {
entry:
- %load = load i8, i8 addrspace(4)* %in
+ %load = load i8, i8* %in
%ext = zext i8 %load to i16
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
@@ -282,9 +282,9 @@ entry:
; VI: flat_load_sbyte v{{[0-9]+}}
; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
; VI: v_or_b32_sdwa
-define void @load_flat_hi_v2i16_reglo_vreg_sexti8(i8 addrspace(4)* %in, i16 %reg) #0 {
+define void @load_flat_hi_v2i16_reglo_vreg_sexti8(i8* %in, i16 %reg) #0 {
entry:
- %load = load i8, i8 addrspace(4)* %in
+ %load = load i8, i8* %in
%ext = sext i8 %load to i16
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
@@ -301,10 +301,10 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
-define void @load_private_hi_v2i16_reglo_vreg(i16* byval %in, i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg(i16 addrspace(5)* byval %in, i16 %reg) #0 {
entry:
- %gep = getelementptr inbounds i16, i16* %in, i64 2045
- %load = load i16, i16* %gep
+ %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2045
+ %load = load i16, i16 addrspace(5)* %gep
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -320,10 +320,10 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
-define void @load_private_hi_v2f16_reglo_vreg(half* byval %in, half %reg) #0 {
+define void @load_private_hi_v2f16_reglo_vreg(half addrspace(5)* byval %in, half %reg) #0 {
entry:
- %gep = getelementptr inbounds half, half* %in, i64 2045
- %load = load half, half* %gep
+ %gep = getelementptr inbounds half, half addrspace(5)* %in, i64 2045
+ %load = load half, half addrspace(5)* %gep
%build0 = insertelement <2 x half> undef, half %reg, i32 0
%build1 = insertelement <2 x half> %build0, half %load, i32 1
store <2 x half> %build1, <2 x half> addrspace(1)* undef
@@ -339,9 +339,9 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_hi_v2i16_reglo_vreg_nooff(i16* byval %in, i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_nooff(i16 addrspace(5)* byval %in, i16 %reg) #0 {
entry:
- %load = load volatile i16, i16* inttoptr (i32 4094 to i16*)
+ %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*)
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -357,9 +357,9 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_hi_v2f16_reglo_vreg_nooff(half* %in, half %reg) #0 {
+define void @load_private_hi_v2f16_reglo_vreg_nooff(half addrspace(5)* %in, half %reg) #0 {
entry:
- %load = load volatile half, half* inttoptr (i32 4094 to half*)
+ %load = load volatile half, half addrspace(5)* inttoptr (i32 4094 to half addrspace(5)*)
%build0 = insertelement <2 x half> undef, half %reg, i32 0
%build1 = insertelement <2 x half> %build0, half %load, i32 1
store <2 x half> %build1, <2 x half> addrspace(1)* undef
@@ -375,10 +375,10 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ubyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
-define void @load_private_hi_v2i16_reglo_vreg_zexti8(i8* byval %in, i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_zexti8(i8 addrspace(5)* byval %in, i16 %reg) #0 {
entry:
- %gep = getelementptr inbounds i8, i8* %in, i64 4091
- %load = load i8, i8* %gep
+ %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4091
+ %load = load i8, i8 addrspace(5)* %gep
%ext = zext i8 %load to i16
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
@@ -395,10 +395,10 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_sbyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
-define void @load_private_hi_v2i16_reglo_vreg_sexti8(i8* byval %in, i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_sexti8(i8 addrspace(5)* byval %in, i16 %reg) #0 {
entry:
- %gep = getelementptr inbounds i8, i8* %in, i64 4091
- %load = load i8, i8* %gep
+ %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4091
+ %load = load i8, i8 addrspace(5)* %gep
%ext = sext i8 %load to i16
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
@@ -415,9 +415,9 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_hi_v2i16_reglo_vreg_nooff_zexti8(i8* %in, i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i16 %reg) #0 {
entry:
- %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
+ %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
%ext = zext i8 %load to i16
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
@@ -434,9 +434,9 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_sbyte v0, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_hi_v2i16_reglo_vreg_nooff_sexti8(i8* %in, i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_nooff_sexti8(i8 addrspace(5)* %in, i16 %reg) #0 {
entry:
- %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
+ %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
%ext = sext i8 %load to i16
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
@@ -453,9 +453,9 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_hi_v2f16_reglo_vreg_nooff_zexti8(i8* %in, half %reg) #0 {
+define void @load_private_hi_v2f16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, half %reg) #0 {
entry:
- %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
+ %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
%ext = zext i8 %load to i16
%bc.ext = bitcast i16 %ext to half
%build0 = insertelement <2 x half> undef, half %reg, i32 0
@@ -510,12 +510,12 @@ entry:
; GFX9-NEXT: buffer_load_short_d16_hi v{{[0-9]+}}, off, s[0:3], s5 offset:4094
define void @load_private_hi_v2i16_reglo_vreg_to_offset(i16 %reg) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4
- %obj1 = alloca [4096 x i16], align 2
- %bc = bitcast [10 x i32]* %obj0 to i32*
- store volatile i32 123, i32* %bc
- %gep = getelementptr inbounds [4096 x i16], [4096 x i16]* %obj1, i32 0, i32 2025
- %load = load i16, i16* %gep
+ %obj0 = alloca [10 x i32], align 4, addrspace(5)
+ %obj1 = alloca [4096 x i16], align 2, addrspace(5)
+ %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
+ store volatile i32 123, i32 addrspace(5)* %bc
+ %gep = getelementptr inbounds [4096 x i16], [4096 x i16] addrspace(5)* %obj1, i32 0, i32 2025
+ %load = load i16, i16 addrspace(5)* %gep
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -527,12 +527,12 @@ entry:
; GFX9-NEXT: buffer_load_sbyte_d16_hi v{{[0-9]+}}, off, s[0:3], s5 offset:4095
define void @load_private_hi_v2i16_reglo_vreg_sexti8_to_offset(i16 %reg) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4
- %obj1 = alloca [4096 x i8], align 2
- %bc = bitcast [10 x i32]* %obj0 to i32*
- store volatile i32 123, i32* %bc
- %gep = getelementptr inbounds [4096 x i8], [4096 x i8]* %obj1, i32 0, i32 4051
- %load = load i8, i8* %gep
+ %obj0 = alloca [10 x i32], align 4, addrspace(5)
+ %obj1 = alloca [4096 x i8], align 2, addrspace(5)
+ %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
+ store volatile i32 123, i32 addrspace(5)* %bc
+ %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4051
+ %load = load i8, i8 addrspace(5)* %gep
%ext = sext i8 %load to i16
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
@@ -545,12 +545,12 @@ entry:
; GFX9-NEXT: buffer_load_ubyte_d16_hi v{{[0-9]+}}, off, s[0:3], s5 offset:4095
define void @load_private_hi_v2i16_reglo_vreg_zexti8_to_offset(i16 %reg) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4
- %obj1 = alloca [4096 x i8], align 2
- %bc = bitcast [10 x i32]* %obj0 to i32*
- store volatile i32 123, i32* %bc
- %gep = getelementptr inbounds [4096 x i8], [4096 x i8]* %obj1, i32 0, i32 4051
- %load = load i8, i8* %gep
+ %obj0 = alloca [10 x i32], align 4, addrspace(5)
+ %obj1 = alloca [4096 x i8], align 2, addrspace(5)
+ %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
+ store volatile i32 123, i32 addrspace(5)* %bc
+ %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4051
+ %load = load i8, i8 addrspace(5)* %gep
%ext = zext i8 %load to i16
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1
@@ -606,11 +606,11 @@ entry:
; GFX9-NEXT: s_waitcnt
; GFX9-NEXT: v_mov_b32_e32 v0, v2
; GFX9-NEXT: s_setpc_b64
-define <2 x i16> @load_flat_v2i16_split(i16 addrspace(4)* %in) #0 {
+define <2 x i16> @load_flat_v2i16_split(i16* %in) #0 {
entry:
- %gep = getelementptr inbounds i16, i16 addrspace(4)* %in, i64 1
- %load0 = load volatile i16, i16 addrspace(4)* %in
- %load1 = load volatile i16, i16 addrspace(4)* %gep
+ %gep = getelementptr inbounds i16, i16* %in, i64 1
+ %load0 = load volatile i16, i16* %in
+ %load1 = load volatile i16, i16* %gep
%build0 = insertelement <2 x i16> undef, i16 %load0, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %load1, i32 1
ret <2 x i16> %build1
@@ -644,11 +644,11 @@ entry:
; GFX9-NEXT: buffer_load_short_d16_hi v0, off, s[0:3], s5 offset:6
; GFX9-NEXT: s_waitcnt
; GFX9-NEXT: s_setpc_b64
-define <2 x i16> @load_private_v2i16_split(i16* byval %in) #0 {
+define <2 x i16> @load_private_v2i16_split(i16 addrspace(5)* byval %in) #0 {
entry:
- %gep = getelementptr inbounds i16, i16* %in, i32 1
- %load0 = load volatile i16, i16* %in
- %load1 = load volatile i16, i16* %gep
+ %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i32 1
+ %load0 = load volatile i16, i16 addrspace(5)* %in
+ %load1 = load volatile i16, i16 addrspace(5)* %gep
%build0 = insertelement <2 x i16> undef, i16 %load0, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %load1, i32 1
ret <2 x i16> %build1
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-lo16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-lo16.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-lo16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-lo16.ll Fri Feb 2 08:07:16 2018
@@ -269,10 +269,10 @@ entry:
; VI: flat_load_ushort v{{[0-9]+}}
; VI: v_or_b32_e32
-define void @load_flat_lo_v2i16_reghi_vreg(i16 addrspace(4)* %in, i32 %reg) #0 {
+define void @load_flat_lo_v2i16_reghi_vreg(i16* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %load = load i16, i16 addrspace(4)* %in
+ %load = load i16, i16* %in
%build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
ret void
@@ -288,10 +288,10 @@ entry:
; VI: flat_load_ushort v{{[0-9]+}}
; VI: v_or_b32_e32
-define void @load_flat_lo_v2f16_reghi_vreg(half addrspace(4)* %in, i32 %reg) #0 {
+define void @load_flat_lo_v2f16_reghi_vreg(half* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x half>
- %load = load half, half addrspace(4)* %in
+ %load = load half, half* %in
%build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
store <2 x half> %build1, <2 x half> addrspace(1)* undef
ret void
@@ -307,10 +307,10 @@ entry:
; VI: flat_load_ubyte v{{[0-9]+}}
; VI: v_or_b32_e32
-define void @load_flat_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(4)* %in, i32 %reg) #0 {
+define void @load_flat_lo_v2i16_reglo_vreg_zexti8(i8* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %load = load i8, i8 addrspace(4)* %in
+ %load = load i8, i8* %in
%ext = zext i8 %load to i16
%build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -328,10 +328,10 @@ entry:
; VI: flat_load_sbyte v{{[0-9]+}}
; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-define void @load_flat_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(4)* %in, i32 %reg) #0 {
+define void @load_flat_lo_v2i16_reglo_vreg_sexti8(i8* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %load = load i8, i8 addrspace(4)* %in
+ %load = load i8, i8* %in
%ext = sext i8 %load to i16
%build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -347,11 +347,11 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
-define void @load_private_lo_v2i16_reglo_vreg(i16* byval %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg(i16 addrspace(5)* byval %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %gep = getelementptr inbounds i16, i16* %in, i64 2045
- %load = load i16, i16* %gep
+ %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2045
+ %load = load i16, i16 addrspace(5)* %gep
%build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
ret void
@@ -369,10 +369,10 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
-define void @load_private_lo_v2i16_reghi_vreg(i16* byval %in, i16 %reg) #0 {
+define void @load_private_lo_v2i16_reghi_vreg(i16 addrspace(5)* byval %in, i16 %reg) #0 {
entry:
- %gep = getelementptr inbounds i16, i16* %in, i64 2045
- %load = load i16, i16* %gep
+ %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2045
+ %load = load i16, i16 addrspace(5)* %gep
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
%build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -388,11 +388,11 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
-define void @load_private_lo_v2f16_reglo_vreg(half* byval %in, i32 %reg) #0 {
+define void @load_private_lo_v2f16_reglo_vreg(half addrspace(5)* byval %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x half>
- %gep = getelementptr inbounds half, half* %in, i64 2045
- %load = load half, half* %gep
+ %gep = getelementptr inbounds half, half addrspace(5)* %in, i64 2045
+ %load = load half, half addrspace(5)* %gep
%build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
store <2 x half> %build1, <2 x half> addrspace(1)* undef
ret void
@@ -407,10 +407,10 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_lo_v2i16_reglo_vreg_nooff(i16* %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg_nooff(i16 addrspace(5)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %load = load volatile i16, i16* inttoptr (i32 4094 to i16*)
+ %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*)
%build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
ret void
@@ -425,10 +425,10 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_lo_v2i16_reghi_vreg_nooff(i16* %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reghi_vreg_nooff(i16 addrspace(5)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %load = load volatile i16, i16* inttoptr (i32 4094 to i16*)
+ %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*)
%build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
ret void
@@ -443,10 +443,10 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_lo_v2f16_reglo_vreg_nooff(half* %in, i32 %reg) #0 {
+define void @load_private_lo_v2f16_reglo_vreg_nooff(half addrspace(5)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x half>
- %load = load volatile half, half* inttoptr (i32 4094 to half*)
+ %load = load volatile half, half addrspace(5)* inttoptr (i32 4094 to half addrspace(5)*)
%build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
store <2 x half> %build1, <2 x half> addrspace(1)* undef
ret void
@@ -461,11 +461,11 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ubyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
-define void @load_private_lo_v2i16_reglo_vreg_zexti8(i8* byval %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(5)* byval %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %gep = getelementptr inbounds i8, i8* %in, i64 4091
- %load = load i8, i8* %gep
+ %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4091
+ %load = load i8, i8 addrspace(5)* %gep
%ext = zext i8 %load to i16
%build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -481,11 +481,11 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_sbyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
-define void @load_private_lo_v2i16_reglo_vreg_sexti8(i8* byval %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(5)* byval %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %gep = getelementptr inbounds i8, i8* %in, i64 4091
- %load = load i8, i8* %gep
+ %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4091
+ %load = load i8, i8 addrspace(5)* %gep
%ext = sext i8 %load to i16
%build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -501,10 +501,10 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_lo_v2i16_reglo_vreg_nooff_zexti8(i8* %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
+ %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
%ext = zext i8 %load to i16
%build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -520,10 +520,10 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_sbyte v0, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_lo_v2i16_reglo_vreg_nooff_sexti8(i8* %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg_nooff_sexti8(i8 addrspace(5)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
+ %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
%ext = sext i8 %load to i16
%build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -539,10 +539,10 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}}
-define void @load_private_lo_v2f16_reglo_vreg_nooff_zexti8(i8* %in, i32 %reg) #0 {
+define void @load_private_lo_v2f16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x half>
- %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
+ %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
%ext = zext i8 %load to i16
%bc.ext = bitcast i16 %ext to half
%build1 = insertelement <2 x half> %reg.bc, half %bc.ext, i32 0
@@ -595,13 +595,13 @@ entry:
; VI: buffer_load_ushort v
define void @load_private_lo_v2i16_reglo_vreg_to_offset(i32 %reg) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4
- %obj1 = alloca [4096 x i16], align 2
+ %obj0 = alloca [10 x i32], align 4, addrspace(5)
+ %obj1 = alloca [4096 x i16], align 2, addrspace(5)
%reg.bc = bitcast i32 %reg to <2 x i16>
- %bc = bitcast [10 x i32]* %obj0 to i32*
- store volatile i32 123, i32* %bc
- %gep = getelementptr inbounds [4096 x i16], [4096 x i16]* %obj1, i32 0, i32 2025
- %load = load volatile i16, i16* %gep
+ %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
+ store volatile i32 123, i32 addrspace(5)* %bc
+ %gep = getelementptr inbounds [4096 x i16], [4096 x i16] addrspace(5)* %obj1, i32 0, i32 2025
+ %load = load volatile i16, i16 addrspace(5)* %gep
%build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
ret void
@@ -614,13 +614,13 @@ entry:
; VI: buffer_load_sbyte v
define void @load_private_lo_v2i16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4
- %obj1 = alloca [4096 x i8], align 2
+ %obj0 = alloca [10 x i32], align 4, addrspace(5)
+ %obj1 = alloca [4096 x i8], align 2, addrspace(5)
%reg.bc = bitcast i32 %reg to <2 x i16>
- %bc = bitcast [10 x i32]* %obj0 to i32*
- store volatile i32 123, i32* %bc
- %gep = getelementptr inbounds [4096 x i8], [4096 x i8]* %obj1, i32 0, i32 4051
- %load = load volatile i8, i8* %gep
+ %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
+ store volatile i32 123, i32 addrspace(5)* %bc
+ %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4051
+ %load = load volatile i8, i8 addrspace(5)* %gep
%load.ext = sext i8 %load to i16
%build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -634,13 +634,13 @@ entry:
; VI: buffer_load_ubyte v
define void @load_private_lo_v2i16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4
- %obj1 = alloca [4096 x i8], align 2
+ %obj0 = alloca [10 x i32], align 4, addrspace(5)
+ %obj1 = alloca [4096 x i8], align 2, addrspace(5)
%reg.bc = bitcast i32 %reg to <2 x i16>
- %bc = bitcast [10 x i32]* %obj0 to i32*
- store volatile i32 123, i32* %bc
- %gep = getelementptr inbounds [4096 x i8], [4096 x i8]* %obj1, i32 0, i32 4051
- %load = load volatile i8, i8* %gep
+ %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
+ store volatile i32 123, i32 addrspace(5)* %bc
+ %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4051
+ %load = load volatile i8, i8 addrspace(5)* %gep
%load.ext = zext i8 %load to i16
%build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
Modified: llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-offset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-offset.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-offset.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-offset.ll Fri Feb 2 08:07:16 2018
@@ -13,22 +13,22 @@
; CHECK: buffer_load_dword
define amdgpu_gs float @main(float %v1, float %v2, i32 %idx1, i32 %idx2) {
main_body:
- %m1 = alloca [513 x float]
- %m2 = alloca [513 x float]
+ %m1 = alloca [513 x float], addrspace(5)
+ %m2 = alloca [513 x float], addrspace(5)
- %gep1.store = getelementptr [513 x float], [513 x float]* %m1, i32 0, i32 %idx1
- store float %v1, float* %gep1.store
+ %gep1.store = getelementptr [513 x float], [513 x float] addrspace(5)* %m1, i32 0, i32 %idx1
+ store float %v1, float addrspace(5)* %gep1.store
- %gep2.store = getelementptr [513 x float], [513 x float]* %m2, i32 0, i32 %idx2
- store float %v2, float* %gep2.store
+ %gep2.store = getelementptr [513 x float], [513 x float] addrspace(5)* %m2, i32 0, i32 %idx2
+ store float %v2, float addrspace(5)* %gep2.store
; This used to use a base reg equal to 0.
- %gep1.load = getelementptr [513 x float], [513 x float]* %m1, i32 0, i32 0
- %out1 = load float, float* %gep1.load
+ %gep1.load = getelementptr [513 x float], [513 x float] addrspace(5)* %m1, i32 0, i32 0
+ %out1 = load float, float addrspace(5)* %gep1.load
; This used to attempt to re-use the base reg at 0, generating an out-of-bounds instruction offset.
- %gep2.load = getelementptr [513 x float], [513 x float]* %m2, i32 0, i32 512
- %out2 = load float, float* %gep2.load
+ %gep2.load = getelementptr [513 x float], [513 x float] addrspace(5)* %m2, i32 0, i32 512
+ %out2 = load float, float addrspace(5)* %gep2.load
%r = fadd float %out1, %out2
ret float %r
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll Fri Feb 2 08:07:16 2018
@@ -7,10 +7,10 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_monotonic_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in monotonic monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in monotonic monotonic
ret void
}
@@ -20,10 +20,10 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acquire_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in acquire monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire monotonic
ret void
}
@@ -33,10 +33,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_release_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in release monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release monotonic
ret void
}
@@ -46,10 +46,10 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acq_rel_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in acq_rel monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel monotonic
ret void
}
@@ -59,10 +59,10 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_seq_cst_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst monotonic
ret void
}
@@ -72,10 +72,10 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acquire_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in acquire acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire acquire
ret void
}
@@ -85,10 +85,10 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_release_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in release acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release acquire
ret void
}
@@ -98,10 +98,10 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acq_rel_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in acq_rel acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel acquire
ret void
}
@@ -111,10 +111,10 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_seq_cst_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst acquire
ret void
}
@@ -124,10 +124,10 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_seq_cst_seq_cst(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
ret void
}
@@ -137,10 +137,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_monotonic_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic
ret void
}
@@ -150,10 +150,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acquire_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic
ret void
}
@@ -163,10 +163,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_release_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic
ret void
}
@@ -176,10 +176,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acq_rel_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic
ret void
}
@@ -189,10 +189,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_seq_cst_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic
ret void
}
@@ -202,10 +202,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acquire_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire
ret void
}
@@ -215,10 +215,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_release_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire
ret void
}
@@ -228,10 +228,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acq_rel_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire
ret void
}
@@ -241,10 +241,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_seq_cst_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire
ret void
}
@@ -254,10 +254,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_seq_cst_seq_cst(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst
ret void
}
@@ -267,10 +267,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_monotonic_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic
ret void
}
@@ -280,10 +280,10 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acquire_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic
ret void
}
@@ -293,10 +293,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_release_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") release monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release monotonic
ret void
}
@@ -306,10 +306,10 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acq_rel_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic
ret void
}
@@ -319,10 +319,10 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_seq_cst_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic
ret void
}
@@ -332,10 +332,10 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acquire_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire
ret void
}
@@ -345,10 +345,10 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_release_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") release acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release acquire
ret void
}
@@ -358,10 +358,10 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acq_rel_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire
ret void
}
@@ -371,10 +371,10 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_seq_cst_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire
ret void
}
@@ -384,10 +384,10 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_seq_cst_seq_cst(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
ret void
}
@@ -397,10 +397,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_monotonic_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic
ret void
}
@@ -410,10 +410,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acquire_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic
ret void
}
@@ -423,10 +423,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_release_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic
ret void
}
@@ -436,10 +436,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acq_rel_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic
ret void
}
@@ -449,10 +449,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_seq_cst_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic
ret void
}
@@ -462,10 +462,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acquire_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire
ret void
}
@@ -475,10 +475,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_release_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire
ret void
}
@@ -488,10 +488,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acq_rel_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire
ret void
}
@@ -501,10 +501,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_seq_cst_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire
ret void
}
@@ -514,10 +514,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_seq_cst_seq_cst(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst
ret void
}
@@ -527,10 +527,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_monotonic_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic
ret void
}
@@ -540,10 +540,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acquire_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic
ret void
}
@@ -553,10 +553,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_release_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic
ret void
}
@@ -566,10 +566,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acq_rel_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic
ret void
}
@@ -579,10 +579,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_seq_cst_monotonic(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic
ret void
}
@@ -592,10 +592,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acquire_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire
ret void
}
@@ -605,10 +605,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_release_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") release acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release acquire
ret void
}
@@ -618,10 +618,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acq_rel_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire
ret void
}
@@ -631,10 +631,10 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_seq_cst_acquire(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire
ret void
}
@@ -644,9 +644,9 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_seq_cst_seq_cst(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll Fri Feb 2 08:07:16 2018
@@ -7,9 +7,9 @@
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_monotonic(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in monotonic
+ %val = atomicrmw volatile xchg i32* %out, i32 %in monotonic
ret void
}
@@ -19,9 +19,9 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acquire(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in acquire
+ %val = atomicrmw volatile xchg i32* %out, i32 %in acquire
ret void
}
@@ -31,9 +31,9 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_release(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in release
+ %val = atomicrmw volatile xchg i32* %out, i32 %in release
ret void
}
@@ -43,9 +43,9 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acq_rel(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in acq_rel
+ %val = atomicrmw volatile xchg i32* %out, i32 %in acq_rel
ret void
}
@@ -55,9 +55,9 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_seq_cst(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst
ret void
}
@@ -67,9 +67,9 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_monotonic(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("singlethread") monotonic
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") monotonic
ret void
}
@@ -79,9 +79,9 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acquire(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("singlethread") acquire
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") acquire
ret void
}
@@ -91,9 +91,9 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_release(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("singlethread") release
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") release
ret void
}
@@ -103,9 +103,9 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acq_rel(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("singlethread") acq_rel
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") acq_rel
ret void
}
@@ -115,9 +115,9 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_seq_cst(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("singlethread") seq_cst
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") seq_cst
ret void
}
@@ -127,9 +127,9 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_monotonic(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("agent") monotonic
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") monotonic
ret void
}
@@ -139,9 +139,9 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acquire(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("agent") acquire
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") acquire
ret void
}
@@ -151,9 +151,9 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_release(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("agent") release
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") release
ret void
}
@@ -163,9 +163,9 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acq_rel(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("agent") acq_rel
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") acq_rel
ret void
}
@@ -175,9 +175,9 @@ entry:
; CHECK-NEXT: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_seq_cst(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("agent") seq_cst
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") seq_cst
ret void
}
@@ -187,9 +187,9 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_monotonic(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("workgroup") monotonic
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") monotonic
ret void
}
@@ -199,9 +199,9 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acquire(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("workgroup") acquire
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") acquire
ret void
}
@@ -211,9 +211,9 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_release(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("workgroup") release
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") release
ret void
}
@@ -223,9 +223,9 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acq_rel(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("workgroup") acq_rel
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") acq_rel
ret void
}
@@ -235,9 +235,9 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_seq_cst(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("workgroup") seq_cst
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") seq_cst
ret void
}
@@ -247,9 +247,9 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_monotonic(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("wavefront") monotonic
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") monotonic
ret void
}
@@ -259,9 +259,9 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acquire(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("wavefront") acquire
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") acquire
ret void
}
@@ -271,9 +271,9 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_release(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("wavefront") release
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") release
ret void
}
@@ -283,9 +283,9 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acq_rel(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("wavefront") acq_rel
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") acq_rel
ret void
}
@@ -295,8 +295,8 @@ entry:
; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
; CHECK-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_seq_cst(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("wavefront") seq_cst
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") seq_cst
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll Fri Feb 2 08:07:16 2018
@@ -8,36 +8,36 @@ entry:
ret void
}
-; CHECK: error: <unknown>:0:0: in function invalid_load void (i32 addrspace(4)*, i32 addrspace(4)*): Unsupported synchronization scope
+; CHECK: error: <unknown>:0:0: in function invalid_load void (i32*, i32*): Unsupported synchronization scope
define amdgpu_kernel void @invalid_load(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("invalid") seq_cst, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("invalid") seq_cst, align 4
+ store i32 %val, i32* %out
ret void
}
-; CHECK: error: <unknown>:0:0: in function invalid_store void (i32, i32 addrspace(4)*): Unsupported synchronization scope
+; CHECK: error: <unknown>:0:0: in function invalid_store void (i32, i32*): Unsupported synchronization scope
define amdgpu_kernel void @invalid_store(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("invalid") seq_cst, align 4
+ store atomic i32 %in, i32* %out syncscope("invalid") seq_cst, align 4
ret void
}
-; CHECK: error: <unknown>:0:0: in function invalid_cmpxchg void (i32 addrspace(4)*, i32, i32): Unsupported synchronization scope
+; CHECK: error: <unknown>:0:0: in function invalid_cmpxchg void (i32*, i32, i32): Unsupported synchronization scope
define amdgpu_kernel void @invalid_cmpxchg(
- i32 addrspace(4)* %out, i32 %in, i32 %old) {
+ i32* %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
- %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("invalid") seq_cst seq_cst
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("invalid") seq_cst seq_cst
ret void
}
-; CHECK: error: <unknown>:0:0: in function invalid_rmw void (i32 addrspace(4)*, i32): Unsupported synchronization scope
+; CHECK: error: <unknown>:0:0: in function invalid_rmw void (i32*, i32): Unsupported synchronization scope
define amdgpu_kernel void @invalid_rmw(
- i32 addrspace(4)* %out, i32 %in) {
+ i32* %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("invalid") seq_cst
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("invalid") seq_cst
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll Fri Feb 2 08:07:16 2018
@@ -12,10 +12,10 @@ declare i32 @llvm.amdgcn.workitem.id.x()
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_unordered(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in unordered, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in unordered, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -26,10 +26,10 @@ entry:
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_monotonic(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in monotonic, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in monotonic, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -40,10 +40,10 @@ entry:
; GCN-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_acquire(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in acquire, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in acquire, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -54,10 +54,10 @@ entry:
; GCN-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_seq_cst(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in seq_cst, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in seq_cst, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -68,10 +68,10 @@ entry:
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_unordered(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") unordered, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("singlethread") unordered, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -82,10 +82,10 @@ entry:
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_monotonic(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") monotonic, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("singlethread") monotonic, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -96,10 +96,10 @@ entry:
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_acquire(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") acquire, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("singlethread") acquire, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -110,10 +110,10 @@ entry:
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_seq_cst(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") seq_cst, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("singlethread") seq_cst, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -124,10 +124,10 @@ entry:
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_unordered(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") unordered, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("agent") unordered, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -138,10 +138,10 @@ entry:
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_monotonic(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") monotonic, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("agent") monotonic, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -152,10 +152,10 @@ entry:
; GCN-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_acquire(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") acquire, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("agent") acquire, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -166,10 +166,10 @@ entry:
; GCN-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_seq_cst(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") seq_cst, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("agent") seq_cst, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -180,10 +180,10 @@ entry:
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_unordered(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") unordered, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("workgroup") unordered, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -194,10 +194,10 @@ entry:
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_monotonic(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") monotonic, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("workgroup") monotonic, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -208,10 +208,10 @@ entry:
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_acquire(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") acquire, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("workgroup") acquire, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -222,10 +222,10 @@ entry:
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_seq_cst(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") seq_cst, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("workgroup") seq_cst, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -236,10 +236,10 @@ entry:
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_unordered(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") unordered, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("wavefront") unordered, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -250,10 +250,10 @@ entry:
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_monotonic(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") monotonic, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("wavefront") monotonic, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -264,10 +264,10 @@ entry:
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_acquire(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") acquire, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("wavefront") acquire, align 4
+ store i32 %val, i32* %out
ret void
}
@@ -278,42 +278,42 @@ entry:
; GCN-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_seq_cst(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") seq_cst, align 4
- store i32 %val, i32 addrspace(4)* %out
+ %val = load atomic i32, i32* %in syncscope("wavefront") seq_cst, align 4
+ store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}nontemporal_private_0
; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
define amdgpu_kernel void @nontemporal_private_0(
- i32* %in, i32 addrspace(4)* %out) {
+ i32 addrspace(5)* %in, i32* %out) {
entry:
- %val = load i32, i32* %in, align 4, !nontemporal !0
- store i32 %val, i32 addrspace(4)* %out
+ %val = load i32, i32 addrspace(5)* %in, align 4, !nontemporal !0
+ store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}nontemporal_private_1
; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
define amdgpu_kernel void @nontemporal_private_1(
- i32* %in, i32 addrspace(4)* %out) {
+ i32 addrspace(5)* %in, i32* %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %val.gep = getelementptr inbounds i32, i32* %in, i32 %tid
- %val = load i32, i32* %val.gep, align 4, !nontemporal !0
- store i32 %val, i32 addrspace(4)* %out
+ %val.gep = getelementptr inbounds i32, i32 addrspace(5)* %in, i32 %tid
+ %val = load i32, i32 addrspace(5)* %val.gep, align 4, !nontemporal !0
+ store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}nontemporal_global_0
; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0x0{{$}}
define amdgpu_kernel void @nontemporal_global_0(
- i32 addrspace(1)* %in, i32 addrspace(4)* %out) {
+ i32 addrspace(1)* %in, i32* %out) {
entry:
%val = load i32, i32 addrspace(1)* %in, align 4, !nontemporal !0
- store i32 %val, i32 addrspace(4)* %out
+ store i32 %val, i32* %out
ret void
}
@@ -321,56 +321,56 @@ entry:
; GFX8: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
; GFX9: global_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], off glc slc{{$}}
define amdgpu_kernel void @nontemporal_global_1(
- i32 addrspace(1)* %in, i32 addrspace(4)* %out) {
+ i32 addrspace(1)* %in, i32* %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%val.gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %val.gep, align 4, !nontemporal !0
- store i32 %val, i32 addrspace(4)* %out
+ store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}nontemporal_local_0
; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @nontemporal_local_0(
- i32 addrspace(3)* %in, i32 addrspace(4)* %out) {
+ i32 addrspace(3)* %in, i32* %out) {
entry:
%val = load i32, i32 addrspace(3)* %in, align 4, !nontemporal !0
- store i32 %val, i32 addrspace(4)* %out
+ store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}nontemporal_local_1
; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @nontemporal_local_1(
- i32 addrspace(3)* %in, i32 addrspace(4)* %out) {
+ i32 addrspace(3)* %in, i32* %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%val.gep = getelementptr inbounds i32, i32 addrspace(3)* %in, i32 %tid
%val = load i32, i32 addrspace(3)* %val.gep, align 4, !nontemporal !0
- store i32 %val, i32 addrspace(4)* %out
+ store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}nontemporal_flat_0
; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
define amdgpu_kernel void @nontemporal_flat_0(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load i32, i32 addrspace(4)* %in, align 4, !nontemporal !0
- store i32 %val, i32 addrspace(4)* %out
+ %val = load i32, i32* %in, align 4, !nontemporal !0
+ store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}nontemporal_flat_1
; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
define amdgpu_kernel void @nontemporal_flat_1(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %val.gep = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tid
- %val = load i32, i32 addrspace(4)* %val.gep, align 4, !nontemporal !0
- store i32 %val, i32 addrspace(4)* %out
+ %val.gep = getelementptr inbounds i32, i32* %in, i32 %tid
+ %val = load i32, i32* %val.gep, align 4, !nontemporal !0
+ store i32 %val, i32* %out
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir Fri Feb 2 08:07:16 2018
@@ -3,27 +3,27 @@
--- |
; ModuleID = 'memory-legalizer-multiple-mem-operands.ll'
source_filename = "memory-legalizer-multiple-mem-operands.ll"
- target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
define amdgpu_kernel void @multiple_mem_operands(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) #0 {
entry:
- %scratch0 = alloca [8192 x i32]
- %scratch1 = alloca [8192 x i32]
- %scratchptr01 = bitcast [8192 x i32]* %scratch0 to i32*
- store i32 1, i32* %scratchptr01
- %scratchptr12 = bitcast [8192 x i32]* %scratch1 to i32*
- store i32 2, i32* %scratchptr12
+ %scratch0 = alloca [8192 x i32], addrspace(5)
+ %scratch1 = alloca [8192 x i32], addrspace(5)
+ %scratchptr01 = bitcast [8192 x i32] addrspace(5)* %scratch0 to i32 addrspace(5)*
+ store i32 1, i32 addrspace(5)* %scratchptr01
+ %scratchptr12 = bitcast [8192 x i32] addrspace(5)* %scratch1 to i32 addrspace(5)*
+ store i32 2, i32 addrspace(5)* %scratchptr12
%cmp = icmp eq i32 %cond, 0
br i1 %cmp, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
if: ; preds = %entry
- %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset, !amdgpu.uniform !0
- %if_value = load atomic i32, i32* %if_ptr syncscope("workgroup") seq_cst, align 4
+ %if_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %if_offset, !amdgpu.uniform !0
+ %if_value = load atomic i32, i32 addrspace(5)* %if_ptr syncscope("workgroup") seq_cst, align 4
br label %done, !structurizecfg.uniform !0
else: ; preds = %entry
- %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset, !amdgpu.uniform !0
- %else_value = load atomic i32, i32* %else_ptr syncscope("agent") unordered, align 4
+ %else_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %else_offset, !amdgpu.uniform !0
+ %else_value = load atomic i32, i32 addrspace(5)* %else_ptr syncscope("agent") unordered, align 4
br label %done, !structurizecfg.uniform !0
done: ; preds = %else, %if
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir Fri Feb 2 08:07:16 2018
@@ -3,27 +3,27 @@
--- |
; ModuleID = 'memory-legalizer-multiple-mem-operands.ll'
source_filename = "memory-legalizer-multiple-mem-operands.ll"
- target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
define amdgpu_kernel void @multiple_mem_operands(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) #0 {
entry:
- %scratch0 = alloca [8192 x i32]
- %scratch1 = alloca [8192 x i32]
- %scratchptr01 = bitcast [8192 x i32]* %scratch0 to i32*
- store i32 1, i32* %scratchptr01
- %scratchptr12 = bitcast [8192 x i32]* %scratch1 to i32*
- store i32 2, i32* %scratchptr12
+ %scratch0 = alloca [8192 x i32], addrspace(5)
+ %scratch1 = alloca [8192 x i32], addrspace(5)
+ %scratchptr01 = bitcast [8192 x i32] addrspace(5)* %scratch0 to i32 addrspace(5)*
+ store i32 1, i32 addrspace(5)* %scratchptr01
+ %scratchptr12 = bitcast [8192 x i32] addrspace(5)* %scratch1 to i32 addrspace(5)*
+ store i32 2, i32 addrspace(5)* %scratchptr12
%cmp = icmp eq i32 %cond, 0
br i1 %cmp, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
if: ; preds = %entry
- %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset, !amdgpu.uniform !0
- %if_value = load i32, i32* %if_ptr, align 4, !nontemporal !1
+ %if_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %if_offset, !amdgpu.uniform !0
+ %if_value = load i32, i32 addrspace(5)* %if_ptr, align 4, !nontemporal !1
br label %done, !structurizecfg.uniform !0
else: ; preds = %entry
- %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset, !amdgpu.uniform !0
- %else_value = load i32, i32* %else_ptr, align 4, !nontemporal !1
+ %else_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %else_offset, !amdgpu.uniform !0
+ %else_value = load i32, i32 addrspace(5)* %else_ptr, align 4, !nontemporal !1
br label %done, !structurizecfg.uniform !0
done: ; preds = %else, %if
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir Fri Feb 2 08:07:16 2018
@@ -3,27 +3,27 @@
--- |
; ModuleID = 'memory-legalizer-multiple-mem-operands.ll'
source_filename = "memory-legalizer-multiple-mem-operands.ll"
- target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
define amdgpu_kernel void @multiple_mem_operands(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) #0 {
entry:
- %scratch0 = alloca [8192 x i32]
- %scratch1 = alloca [8192 x i32]
- %scratchptr01 = bitcast [8192 x i32]* %scratch0 to i32*
- store i32 1, i32* %scratchptr01
- %scratchptr12 = bitcast [8192 x i32]* %scratch1 to i32*
- store i32 2, i32* %scratchptr12
+ %scratch0 = alloca [8192 x i32], addrspace(5)
+ %scratch1 = alloca [8192 x i32], addrspace(5)
+ %scratchptr01 = bitcast [8192 x i32] addrspace(5)* %scratch0 to i32 addrspace(5)*
+ store i32 1, i32 addrspace(5)* %scratchptr01
+ %scratchptr12 = bitcast [8192 x i32] addrspace(5)* %scratch1 to i32 addrspace(5)*
+ store i32 2, i32 addrspace(5)* %scratchptr12
%cmp = icmp eq i32 %cond, 0
br i1 %cmp, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
if: ; preds = %entry
- %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset, !amdgpu.uniform !0
- %if_value = load i32, i32* %if_ptr, align 4, !nontemporal !1
+ %if_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %if_offset, !amdgpu.uniform !0
+ %if_value = load i32, i32 addrspace(5)* %if_ptr, align 4, !nontemporal !1
br label %done, !structurizecfg.uniform !0
else: ; preds = %entry
- %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset, !amdgpu.uniform !0
- %else_value = load i32, i32* %else_ptr, align 4
+ %else_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %else_offset, !amdgpu.uniform !0
+ %else_value = load i32, i32 addrspace(5)* %else_ptr, align 4
br label %done, !structurizecfg.uniform !0
done: ; preds = %else, %if
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll Fri Feb 2 08:07:16 2018
@@ -9,9 +9,9 @@ declare i32 @llvm.amdgcn.workitem.id.x()
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_unordered(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out unordered, align 4
+ store atomic i32 %in, i32* %out unordered, align 4
ret void
}
@@ -19,9 +19,9 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_monotonic(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out monotonic, align 4
+ store atomic i32 %in, i32* %out monotonic, align 4
ret void
}
@@ -29,9 +29,9 @@ entry:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_release(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out release, align 4
+ store atomic i32 %in, i32* %out release, align 4
ret void
}
@@ -39,9 +39,9 @@ entry:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_seq_cst(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out seq_cst, align 4
+ store atomic i32 %in, i32* %out seq_cst, align 4
ret void
}
@@ -49,9 +49,9 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_unordered(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") unordered, align 4
+ store atomic i32 %in, i32* %out syncscope("singlethread") unordered, align 4
ret void
}
@@ -59,9 +59,9 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_monotonic(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") monotonic, align 4
+ store atomic i32 %in, i32* %out syncscope("singlethread") monotonic, align 4
ret void
}
@@ -69,9 +69,9 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_release(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") release, align 4
+ store atomic i32 %in, i32* %out syncscope("singlethread") release, align 4
ret void
}
@@ -79,9 +79,9 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_seq_cst(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") seq_cst, align 4
+ store atomic i32 %in, i32* %out syncscope("singlethread") seq_cst, align 4
ret void
}
@@ -89,9 +89,9 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_unordered(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") unordered, align 4
+ store atomic i32 %in, i32* %out syncscope("agent") unordered, align 4
ret void
}
@@ -99,9 +99,9 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_monotonic(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") monotonic, align 4
+ store atomic i32 %in, i32* %out syncscope("agent") monotonic, align 4
ret void
}
@@ -109,9 +109,9 @@ entry:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_release(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") release, align 4
+ store atomic i32 %in, i32* %out syncscope("agent") release, align 4
ret void
}
@@ -119,9 +119,9 @@ entry:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_seq_cst(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") seq_cst, align 4
+ store atomic i32 %in, i32* %out syncscope("agent") seq_cst, align 4
ret void
}
@@ -129,9 +129,9 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_unordered(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") unordered, align 4
+ store atomic i32 %in, i32* %out syncscope("workgroup") unordered, align 4
ret void
}
@@ -139,9 +139,9 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_monotonic(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") monotonic, align 4
+ store atomic i32 %in, i32* %out syncscope("workgroup") monotonic, align 4
ret void
}
@@ -149,9 +149,9 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_release(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") release, align 4
+ store atomic i32 %in, i32* %out syncscope("workgroup") release, align 4
ret void
}
@@ -159,9 +159,9 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_seq_cst(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") seq_cst, align 4
+ store atomic i32 %in, i32* %out syncscope("workgroup") seq_cst, align 4
ret void
}
@@ -169,9 +169,9 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_unordered(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") unordered, align 4
+ store atomic i32 %in, i32* %out syncscope("wavefront") unordered, align 4
ret void
}
@@ -179,9 +179,9 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_monotonic(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") monotonic, align 4
+ store atomic i32 %in, i32* %out syncscope("wavefront") monotonic, align 4
ret void
}
@@ -189,9 +189,9 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_release(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") release, align 4
+ store atomic i32 %in, i32* %out syncscope("wavefront") release, align 4
ret void
}
@@ -199,31 +199,31 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_seq_cst(
- i32 %in, i32 addrspace(4)* %out) {
+ i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") seq_cst, align 4
+ store atomic i32 %in, i32* %out syncscope("wavefront") seq_cst, align 4
ret void
}
; GCN-LABEL: {{^}}nontemporal_private_0
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
define amdgpu_kernel void @nontemporal_private_0(
- i32 addrspace(4)* %in, i32* %out) {
+ i32* %in, i32 addrspace(5)* %out) {
entry:
- %val = load i32, i32 addrspace(4)* %in, align 4
- store i32 %val, i32* %out, !nontemporal !0
+ %val = load i32, i32* %in, align 4
+ store i32 %val, i32 addrspace(5)* %out, !nontemporal !0
ret void
}
; GCN-LABEL: {{^}}nontemporal_private_1
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
define amdgpu_kernel void @nontemporal_private_1(
- i32 addrspace(4)* %in, i32* %out) {
+ i32* %in, i32 addrspace(5)* %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %val = load i32, i32 addrspace(4)* %in, align 4
- %out.gep = getelementptr inbounds i32, i32* %out, i32 %tid
- store i32 %val, i32* %out.gep, !nontemporal !0
+ %val = load i32, i32* %in, align 4
+ %out.gep = getelementptr inbounds i32, i32 addrspace(5)* %out, i32 %tid
+ store i32 %val, i32 addrspace(5)* %out.gep, !nontemporal !0
ret void
}
@@ -231,9 +231,9 @@ entry:
; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}}
define amdgpu_kernel void @nontemporal_global_0(
- i32 addrspace(4)* %in, i32 addrspace(1)* %out) {
+ i32* %in, i32 addrspace(1)* %out) {
entry:
- %val = load i32, i32 addrspace(4)* %in, align 4
+ %val = load i32, i32* %in, align 4
store i32 %val, i32 addrspace(1)* %out, !nontemporal !0
ret void
}
@@ -242,10 +242,10 @@ entry:
; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}}
define amdgpu_kernel void @nontemporal_global_1(
- i32 addrspace(4)* %in, i32 addrspace(1)* %out) {
+ i32* %in, i32 addrspace(1)* %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %val = load i32, i32 addrspace(4)* %in, align 4
+ %val = load i32, i32* %in, align 4
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
store i32 %val, i32 addrspace(1)* %out.gep, !nontemporal !0
ret void
@@ -254,9 +254,9 @@ entry:
; GCN-LABEL: {{^}}nontemporal_local_0
; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @nontemporal_local_0(
- i32 addrspace(4)* %in, i32 addrspace(3)* %out) {
+ i32* %in, i32 addrspace(3)* %out) {
entry:
- %val = load i32, i32 addrspace(4)* %in, align 4
+ %val = load i32, i32* %in, align 4
store i32 %val, i32 addrspace(3)* %out, !nontemporal !0
ret void
}
@@ -264,10 +264,10 @@ entry:
; GCN-LABEL: {{^}}nontemporal_local_1
; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @nontemporal_local_1(
- i32 addrspace(4)* %in, i32 addrspace(3)* %out) {
+ i32* %in, i32 addrspace(3)* %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %val = load i32, i32 addrspace(4)* %in, align 4
+ %val = load i32, i32* %in, align 4
%out.gep = getelementptr inbounds i32, i32 addrspace(3)* %out, i32 %tid
store i32 %val, i32 addrspace(3)* %out.gep, !nontemporal !0
ret void
@@ -276,22 +276,22 @@ entry:
; GCN-LABEL: {{^}}nontemporal_flat_0
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
define amdgpu_kernel void @nontemporal_flat_0(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
- %val = load i32, i32 addrspace(4)* %in, align 4
- store i32 %val, i32 addrspace(4)* %out, !nontemporal !0
+ %val = load i32, i32* %in, align 4
+ store i32 %val, i32* %out, !nontemporal !0
ret void
}
; GCN-LABEL: {{^}}nontemporal_flat_1
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
define amdgpu_kernel void @nontemporal_flat_1(
- i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
+ i32* %in, i32* %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %val = load i32, i32 addrspace(4)* %in, align 4
- %out.gep = getelementptr inbounds i32, i32 addrspace(4)* %out, i32 %tid
- store i32 %val, i32 addrspace(4)* %out.gep, !nontemporal !0
+ %val = load i32, i32* %in, align 4
+ %out.gep = getelementptr inbounds i32, i32* %out, i32 %tid
+ store i32 %val, i32* %out.gep, !nontemporal !0
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/move-to-valu-worklist.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/move-to-valu-worklist.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/move-to-valu-worklist.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/move-to-valu-worklist.ll Fri Feb 2 08:07:16 2018
@@ -13,7 +13,7 @@
; GCN-NEXT: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @in_worklist_once() #0 {
bb:
- %tmp = load i64, i64* undef
+ %tmp = load i64, i64 addrspace(5)* undef
br label %bb1
bb1: ; preds = %bb1, %bb
Modified: llvm/trunk/test/CodeGen/AMDGPU/mubuf-offset-private.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mubuf-offset-private.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mubuf-offset-private.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mubuf-offset-private.ll Fri Feb 2 08:07:16 2018
@@ -7,49 +7,49 @@
; GCN-LABEL: {{^}}store_private_offset_i8:
; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @store_private_offset_i8() #0 {
- store volatile i8 5, i8* inttoptr (i32 8 to i8*)
+ store volatile i8 5, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}store_private_offset_i16:
; GCN: buffer_store_short v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @store_private_offset_i16() #0 {
- store volatile i16 5, i16* inttoptr (i32 8 to i16*)
+ store volatile i16 5, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}store_private_offset_i32:
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @store_private_offset_i32() #0 {
- store volatile i32 5, i32* inttoptr (i32 8 to i32*)
+ store volatile i32 5, i32 addrspace(5)* inttoptr (i32 8 to i32 addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}store_private_offset_v2i32:
; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @store_private_offset_v2i32() #0 {
- store volatile <2 x i32> <i32 5, i32 10>, <2 x i32>* inttoptr (i32 8 to <2 x i32>*)
+ store volatile <2 x i32> <i32 5, i32 10>, <2 x i32> addrspace(5)* inttoptr (i32 8 to <2 x i32> addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}store_private_offset_v4i32:
; GCN: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @store_private_offset_v4i32() #0 {
- store volatile <4 x i32> <i32 5, i32 10, i32 15, i32 0>, <4 x i32>* inttoptr (i32 8 to <4 x i32>*)
+ store volatile <4 x i32> <i32 5, i32 10, i32 15, i32 0>, <4 x i32> addrspace(5)* inttoptr (i32 8 to <4 x i32> addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}load_private_offset_i8:
; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @load_private_offset_i8() #0 {
- %load = load volatile i8, i8* inttoptr (i32 8 to i8*)
+ %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}sextload_private_offset_i8:
; GCN: buffer_load_sbyte v{{[0-9]+}}, off, s[4:7], s8 offset:8
define amdgpu_kernel void @sextload_private_offset_i8(i32 addrspace(1)* %out) #0 {
- %load = load volatile i8, i8* inttoptr (i32 8 to i8*)
+ %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*)
%sextload = sext i8 %load to i32
store i32 %sextload, i32 addrspace(1)* undef
ret void
@@ -58,7 +58,7 @@ define amdgpu_kernel void @sextload_priv
; GCN-LABEL: {{^}}zextload_private_offset_i8:
; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s8 offset:8
define amdgpu_kernel void @zextload_private_offset_i8(i32 addrspace(1)* %out) #0 {
- %load = load volatile i8, i8* inttoptr (i32 8 to i8*)
+ %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*)
%zextload = zext i8 %load to i32
store i32 %zextload, i32 addrspace(1)* undef
ret void
@@ -67,14 +67,14 @@ define amdgpu_kernel void @zextload_priv
; GCN-LABEL: {{^}}load_private_offset_i16:
; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @load_private_offset_i16() #0 {
- %load = load volatile i16, i16* inttoptr (i32 8 to i16*)
+ %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}sextload_private_offset_i16:
; GCN: buffer_load_sshort v{{[0-9]+}}, off, s[4:7], s8 offset:8
define amdgpu_kernel void @sextload_private_offset_i16(i32 addrspace(1)* %out) #0 {
- %load = load volatile i16, i16* inttoptr (i32 8 to i16*)
+ %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*)
%sextload = sext i16 %load to i32
store i32 %sextload, i32 addrspace(1)* undef
ret void
@@ -83,7 +83,7 @@ define amdgpu_kernel void @sextload_priv
; GCN-LABEL: {{^}}zextload_private_offset_i16:
; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s8 offset:8
define amdgpu_kernel void @zextload_private_offset_i16(i32 addrspace(1)* %out) #0 {
- %load = load volatile i16, i16* inttoptr (i32 8 to i16*)
+ %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*)
%zextload = zext i16 %load to i32
store i32 %zextload, i32 addrspace(1)* undef
ret void
@@ -92,28 +92,28 @@ define amdgpu_kernel void @zextload_priv
; GCN-LABEL: {{^}}load_private_offset_i32:
; GCN: buffer_load_dword v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @load_private_offset_i32() #0 {
- %load = load volatile i32, i32* inttoptr (i32 8 to i32*)
+ %load = load volatile i32, i32 addrspace(5)* inttoptr (i32 8 to i32 addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}load_private_offset_v2i32:
; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @load_private_offset_v2i32() #0 {
- %load = load volatile <2 x i32>, <2 x i32>* inttoptr (i32 8 to <2 x i32>*)
+ %load = load volatile <2 x i32>, <2 x i32> addrspace(5)* inttoptr (i32 8 to <2 x i32> addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}load_private_offset_v4i32:
; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @load_private_offset_v4i32() #0 {
- %load = load volatile <4 x i32>, <4 x i32>* inttoptr (i32 8 to <4 x i32>*)
+ %load = load volatile <4 x i32>, <4 x i32> addrspace(5)* inttoptr (i32 8 to <4 x i32> addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}store_private_offset_i8_max_offset:
; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s2 offset:4095
define amdgpu_kernel void @store_private_offset_i8_max_offset() #0 {
- store volatile i8 5, i8* inttoptr (i32 4095 to i8*)
+ store volatile i8 5, i8 addrspace(5)* inttoptr (i32 4095 to i8 addrspace(5)*)
ret void
}
@@ -121,7 +121,7 @@ define amdgpu_kernel void @store_private
; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000
; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s2 offen{{$}}
define amdgpu_kernel void @store_private_offset_i8_max_offset_plus1() #0 {
- store volatile i8 5, i8* inttoptr (i32 4096 to i8*)
+ store volatile i8 5, i8 addrspace(5)* inttoptr (i32 4096 to i8 addrspace(5)*)
ret void
}
@@ -129,7 +129,7 @@ define amdgpu_kernel void @store_private
; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000
; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s2 offen offset:1{{$}}
define amdgpu_kernel void @store_private_offset_i8_max_offset_plus2() #0 {
- store volatile i8 5, i8* inttoptr (i32 4097 to i8*)
+ store volatile i8 5, i8 addrspace(5)* inttoptr (i32 4097 to i8 addrspace(5)*)
ret void
}
@@ -144,11 +144,11 @@ define amdgpu_kernel void @store_private
; GFX9: v_add_u32_e32 [[ADDR:v[0-9]+]], 4,
; GFX9: buffer_store_dword v{{[0-9]+}}, [[ADDR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:32
define amdgpu_kernel void @store_private_unknown_bits_vaddr() #0 {
- %alloca = alloca [16 x i32], align 4
+ %alloca = alloca [16 x i32], align 4, addrspace(5)
%vaddr = load volatile i32, i32 addrspace(1)* undef
%vaddr.off = add i32 %vaddr, 8
- %gep = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %vaddr.off
- store volatile i32 9, i32* %gep
+ %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %vaddr.off
+ store volatile i32 9, i32 addrspace(5)* %gep
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/nested-calls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/nested-calls.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/nested-calls.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/nested-calls.ll Fri Feb 2 08:07:16 2018
@@ -39,11 +39,11 @@ define void @test_func_call_external_voi
; GCN: s_sub_u32 s32, s32, 0x1200{{$}}
; GCN: s_setpc_b64
define void @test_func_call_external_void_func_i32_imm_stack_use() #0 {
- %alloca = alloca [16 x i32], align 4
- %gep0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 0
- %gep15 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 16
- store volatile i32 0, i32* %gep0
- store volatile i32 0, i32* %gep15
+ %alloca = alloca [16 x i32], align 4, addrspace(5)
+ %gep0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0
+ %gep15 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 16
+ store volatile i32 0, i32 addrspace(5)* %gep0
+ store volatile i32 0, i32 addrspace(5)* %gep15
call void @external_void_func_i32(i32 42)
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/parallelandifcollapse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/parallelandifcollapse.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/parallelandifcollapse.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/parallelandifcollapse.ll Fri Feb 2 08:07:16 2018
@@ -13,44 +13,44 @@
define amdgpu_kernel void @_Z9chk1D_512v() #0 {
entry:
- %a0 = alloca i32, align 4
- %b0 = alloca i32, align 4
- %c0 = alloca i32, align 4
- %d0 = alloca i32, align 4
- %a1 = alloca i32, align 4
- %b1 = alloca i32, align 4
- %c1 = alloca i32, align 4
- %d1 = alloca i32, align 4
- %data = alloca i32, align 4
- %0 = load i32, i32* %a0, align 4
- %1 = load i32, i32* %b0, align 4
+ %a0 = alloca i32, align 4, addrspace(5)
+ %b0 = alloca i32, align 4, addrspace(5)
+ %c0 = alloca i32, align 4, addrspace(5)
+ %d0 = alloca i32, align 4, addrspace(5)
+ %a1 = alloca i32, align 4, addrspace(5)
+ %b1 = alloca i32, align 4, addrspace(5)
+ %c1 = alloca i32, align 4, addrspace(5)
+ %d1 = alloca i32, align 4, addrspace(5)
+ %data = alloca i32, align 4, addrspace(5)
+ %0 = load i32, i32 addrspace(5)* %a0, align 4
+ %1 = load i32, i32 addrspace(5)* %b0, align 4
%cmp = icmp ne i32 %0, %1
br i1 %cmp, label %land.lhs.true, label %if.end
land.lhs.true: ; preds = %entry
- %2 = load i32, i32* %c0, align 4
- %3 = load i32, i32* %d0, align 4
+ %2 = load i32, i32 addrspace(5)* %c0, align 4
+ %3 = load i32, i32 addrspace(5)* %d0, align 4
%cmp1 = icmp ne i32 %2, %3
br i1 %cmp1, label %if.then, label %if.end
if.then: ; preds = %land.lhs.true
- store i32 1, i32* %data, align 4
+ store i32 1, i32 addrspace(5)* %data, align 4
br label %if.end
if.end: ; preds = %if.then, %land.lhs.true, %entry
- %4 = load i32, i32* %a1, align 4
- %5 = load i32, i32* %b1, align 4
+ %4 = load i32, i32 addrspace(5)* %a1, align 4
+ %5 = load i32, i32 addrspace(5)* %b1, align 4
%cmp2 = icmp ne i32 %4, %5
br i1 %cmp2, label %land.lhs.true3, label %if.end6
land.lhs.true3: ; preds = %if.end
- %6 = load i32, i32* %c1, align 4
- %7 = load i32, i32* %d1, align 4
+ %6 = load i32, i32 addrspace(5)* %c1, align 4
+ %7 = load i32, i32 addrspace(5)* %d1, align 4
%cmp4 = icmp ne i32 %6, %7
br i1 %cmp4, label %if.then5, label %if.end6
if.then5: ; preds = %land.lhs.true3
- store i32 1, i32* %data, align 4
+ store i32 1, i32 addrspace(5)* %data, align 4
br label %if.end6
if.end6: ; preds = %if.then5, %land.lhs.true3, %if.end
Modified: llvm/trunk/test/CodeGen/AMDGPU/private-access-no-objects.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/private-access-no-objects.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/private-access-no-objects.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/private-access-no-objects.ll Fri Feb 2 08:07:16 2018
@@ -19,7 +19,7 @@
; OPTNONE-NOT: s_mov_b32
; OPTNONE: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s5 offen{{$}}
define amdgpu_kernel void @store_to_undef() #0 {
- store volatile i32 0, i32* undef
+ store volatile i32 0, i32 addrspace(5)* undef
ret void
}
@@ -29,7 +29,7 @@ define amdgpu_kernel void @store_to_unde
; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s5{{$}}
; OPT: buffer_store_dword v{{[0-9]+}}, off, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offset:124{{$}}
define amdgpu_kernel void @store_to_inttoptr() #0 {
- store volatile i32 0, i32* inttoptr (i32 124 to i32*)
+ store volatile i32 0, i32 addrspace(5)* inttoptr (i32 124 to i32 addrspace(5)*)
ret void
}
@@ -39,7 +39,7 @@ define amdgpu_kernel void @store_to_intt
; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s5{{$}}
; OPT: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offen{{$}}
define amdgpu_kernel void @load_from_undef() #0 {
- %ld = load volatile i32, i32* undef
+ %ld = load volatile i32, i32 addrspace(5)* undef
ret void
}
@@ -49,7 +49,7 @@ define amdgpu_kernel void @load_from_und
; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s5{{$}}
; OPT: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offset:124{{$}}
define amdgpu_kernel void @load_from_inttoptr() #0 {
- %ld = load volatile i32, i32* inttoptr (i32 124 to i32*)
+ %ld = load volatile i32, i32 addrspace(5)* inttoptr (i32 124 to i32 addrspace(5)*)
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll Fri Feb 2 08:07:16 2018
@@ -43,13 +43,13 @@ entry:
%gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom
%index.load = load i32, i32 addrspace(1)* %gep.index
%index = and i32 %index.load, 2
- %alloca = alloca [2 x <4 x i32>], align 16
- %gep0 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %alloca, i32 0, i32 0
- %gep1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %alloca, i32 0, i32 1
- store <4 x i32> zeroinitializer, <4 x i32>* %gep0
- store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %gep1
- %gep2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %alloca, i32 0, i32 %index
- %load = load <4 x i32>, <4 x i32>* %gep2
+ %alloca = alloca [2 x <4 x i32>], align 16, addrspace(5)
+ %gep0 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>] addrspace(5)* %alloca, i32 0, i32 0
+ %gep1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>] addrspace(5)* %alloca, i32 0, i32 1
+ store <4 x i32> zeroinitializer, <4 x i32> addrspace(5)* %gep0
+ store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(5)* %gep1
+ %gep2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>] addrspace(5)* %alloca, i32 0, i32 %index
+ %load = load <4 x i32>, <4 x i32> addrspace(5)* %gep2
store <4 x i32> %load, <4 x i32> addrspace(1)* %out
ret void
}
@@ -113,13 +113,13 @@ entry:
%gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom
%index.load = load i32, i32 addrspace(1)* %gep.index
%index = and i32 %index.load, 2
- %alloca = alloca [2 x <8 x i32>], align 16
- %gep0 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>]* %alloca, i32 0, i32 0
- %gep1 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>]* %alloca, i32 0, i32 1
- store <8 x i32> zeroinitializer, <8 x i32>* %gep0
- store <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <8 x i32>* %gep1
- %gep2 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>]* %alloca, i32 0, i32 %index
- %load = load <8 x i32>, <8 x i32>* %gep2
+ %alloca = alloca [2 x <8 x i32>], align 16, addrspace(5)
+ %gep0 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>] addrspace(5)* %alloca, i32 0, i32 0
+ %gep1 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>] addrspace(5)* %alloca, i32 0, i32 1
+ store <8 x i32> zeroinitializer, <8 x i32> addrspace(5)* %gep0
+ store <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <8 x i32> addrspace(5)* %gep1
+ %gep2 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>] addrspace(5)* %alloca, i32 0, i32 %index
+ %load = load <8 x i32>, <8 x i32> addrspace(5)* %gep2
store <8 x i32> %load, <8 x i32> addrspace(1)* %out
ret void
}
@@ -150,13 +150,13 @@ entry:
%gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom
%index.load = load i32, i32 addrspace(1)* %gep.index
%index = and i32 %index.load, 2
- %alloca = alloca [2 x i64], align 16
- %gep0 = getelementptr inbounds [2 x i64], [2 x i64]* %alloca, i32 0, i32 0
- %gep1 = getelementptr inbounds [2 x i64], [2 x i64]* %alloca, i32 0, i32 1
- store i64 0, i64* %gep0
- store i64 34359738602, i64* %gep1
- %gep2 = getelementptr inbounds [2 x i64], [2 x i64]* %alloca, i32 0, i32 %index
- %load = load i64, i64* %gep2
+ %alloca = alloca [2 x i64], align 16, addrspace(5)
+ %gep0 = getelementptr inbounds [2 x i64], [2 x i64] addrspace(5)* %alloca, i32 0, i32 0
+ %gep1 = getelementptr inbounds [2 x i64], [2 x i64] addrspace(5)* %alloca, i32 0, i32 1
+ store i64 0, i64 addrspace(5)* %gep0
+ store i64 34359738602, i64 addrspace(5)* %gep1
+ %gep2 = getelementptr inbounds [2 x i64], [2 x i64] addrspace(5)* %alloca, i32 0, i32 %index
+ %load = load i64, i64 addrspace(5)* %gep2
store i64 %load, i64 addrspace(1)* %out
ret void
}
@@ -186,13 +186,13 @@ entry:
%gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom
%index.load = load i32, i32 addrspace(1)* %gep.index
%index = and i32 %index.load, 2
- %alloca = alloca [2 x double], align 16
- %gep0 = getelementptr inbounds [2 x double], [2 x double]* %alloca, i32 0, i32 0
- %gep1 = getelementptr inbounds [2 x double], [2 x double]* %alloca, i32 0, i32 1
- store double 0.0, double* %gep0
- store double 4.0, double* %gep1
- %gep2 = getelementptr inbounds [2 x double], [2 x double]* %alloca, i32 0, i32 %index
- %load = load double, double* %gep2
+ %alloca = alloca [2 x double], align 16, addrspace(5)
+ %gep0 = getelementptr inbounds [2 x double], [2 x double] addrspace(5)* %alloca, i32 0, i32 0
+ %gep1 = getelementptr inbounds [2 x double], [2 x double] addrspace(5)* %alloca, i32 0, i32 1
+ store double 0.0, double addrspace(5)* %gep0
+ store double 4.0, double addrspace(5)* %gep1
+ %gep2 = getelementptr inbounds [2 x double], [2 x double] addrspace(5)* %alloca, i32 0, i32 %index
+ %load = load double, double addrspace(5)* %gep2
store double %load, double addrspace(1)* %out
ret void
}
@@ -235,13 +235,13 @@ entry:
%gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom
%index.load = load i32, i32 addrspace(1)* %gep.index
%index = and i32 %index.load, 2
- %alloca = alloca [2 x <2 x i64>], align 16
- %gep0 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* %alloca, i32 0, i32 0
- %gep1 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* %alloca, i32 0, i32 1
- store <2 x i64> zeroinitializer, <2 x i64>* %gep0
- store <2 x i64> <i64 1, i64 2>, <2 x i64>* %gep1
- %gep2 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* %alloca, i32 0, i32 %index
- %load = load <2 x i64>, <2 x i64>* %gep2
+ %alloca = alloca [2 x <2 x i64>], align 16, addrspace(5)
+ %gep0 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>] addrspace(5)* %alloca, i32 0, i32 0
+ %gep1 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>] addrspace(5)* %alloca, i32 0, i32 1
+ store <2 x i64> zeroinitializer, <2 x i64> addrspace(5)* %gep0
+ store <2 x i64> <i64 1, i64 2>, <2 x i64> addrspace(5)* %gep1
+ %gep2 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>] addrspace(5)* %alloca, i32 0, i32 %index
+ %load = load <2 x i64>, <2 x i64> addrspace(5)* %gep2
store <2 x i64> %load, <2 x i64> addrspace(1)* %out
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/private-memory-atomics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/private-memory-atomics.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/private-memory-atomics.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/private-memory-atomics.ll Fri Feb 2 08:07:16 2018
@@ -6,26 +6,26 @@
; Private atomics have no real use, but at least shouldn't crash on it.
define amdgpu_kernel void @atomicrmw_private(i32 addrspace(1)* %out, i32 %in) nounwind {
entry:
- %tmp = alloca [2 x i32]
- %tmp1 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
- store i32 0, i32* %tmp1
- store i32 1, i32* %tmp2
- %tmp3 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in
- %tmp4 = atomicrmw add i32* %tmp3, i32 7 acq_rel
+ %tmp = alloca [2 x i32], addrspace(5)
+ %tmp1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %tmp1
+ store i32 1, i32 addrspace(5)* %tmp2
+ %tmp3 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 %in
+ %tmp4 = atomicrmw add i32 addrspace(5)* %tmp3, i32 7 acq_rel
store i32 %tmp4, i32 addrspace(1)* %out
ret void
}
define amdgpu_kernel void @cmpxchg_private(i32 addrspace(1)* %out, i32 %in) nounwind {
entry:
- %tmp = alloca [2 x i32]
- %tmp1 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
- store i32 0, i32* %tmp1
- store i32 1, i32* %tmp2
- %tmp3 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in
- %tmp4 = cmpxchg i32* %tmp3, i32 0, i32 1 acq_rel monotonic
+ %tmp = alloca [2 x i32], addrspace(5)
+ %tmp1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %tmp1
+ store i32 1, i32 addrspace(5)* %tmp2
+ %tmp3 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 %in
+ %tmp4 = cmpxchg i32 addrspace(5)* %tmp3, i32 0, i32 1 acq_rel monotonic
%val = extractvalue { i32, i1 } %tmp4, 0
store i32 %val, i32 addrspace(1)* %out
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll Fri Feb 2 08:07:16 2018
@@ -7,20 +7,20 @@
; Make sure that AMDGPUPromoteAlloca doesn't crash if the called
; function is a constantexpr cast of a function.
-declare void @foo(float*) #0
+declare void @foo(float addrspace(5)*) #0
declare void @foo.varargs(...) #0
; XCHECK: in function crash_call_constexpr_cast{{.*}}: unsupported call to function foo
define amdgpu_kernel void @crash_call_constexpr_cast() #0 {
- %alloca = alloca i32
- call void bitcast (void (float*)* @foo to void (i32*)*)(i32* %alloca) #0
+ %alloca = alloca i32, addrspace(5)
+ call void bitcast (void (float addrspace(5)*)* @foo to void (i32 addrspace(5)*)*)(i32 addrspace(5)* %alloca) #0
ret void
}
; XCHECK: in function crash_call_constexpr_cast{{.*}}: unsupported call to function foo.varargs
define amdgpu_kernel void @crash_call_constexpr_cast_varargs() #0 {
- %alloca = alloca i32
- call void bitcast (void (...)* @foo.varargs to void (i32*)*)(i32* %alloca) #0
+ %alloca = alloca i32, addrspace(5)
+ call void bitcast (void (...)* @foo.varargs to void (i32 addrspace(5)*)*)(i32 addrspace(5)* %alloca) #0
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll Fri Feb 2 08:07:16 2018
@@ -1,4 +1,4 @@
-; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s
+; RUN: opt -data-layout=A5 -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s
; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=ASM %s
; IR-LABEL: define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
@@ -8,19 +8,19 @@
; ASM: ; ScratchSize: 24
define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
entry:
- %stack = alloca [5 x i32], align 4
+ %stack = alloca [5 x i32], align 4, addrspace(5)
%tmp0 = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0
- store i32 4, i32* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp0
+ store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1
- store i32 5, i32* %arrayidx3, align 4
- %arrayidx4 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
- %tmp2 = load i32, i32* %arrayidx4, align 4
+ %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp1
+ store i32 5, i32 addrspace(5)* %arrayidx3, align 4
+ %arrayidx4 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
+ %tmp2 = load i32, i32 addrspace(5)* %arrayidx4, align 4
store i32 %tmp2, i32 addrspace(1)* %out, align 4
- %arrayidx5 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
- %tmp3 = load i32, i32* %arrayidx5
+ %arrayidx5 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
+ %tmp3 = load i32, i32 addrspace(5)* %arrayidx5
%arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %tmp3, i32 addrspace(1)* %arrayidx6
ret void
@@ -35,13 +35,13 @@ entry:
; ASM: ; ScratchSize: 0
define void @promote_to_vector_call_c(i32 addrspace(1)* %out, i32 %in) #0 {
entry:
- %tmp = alloca [2 x i32]
- %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
- store i32 0, i32* %tmp1
- store i32 1, i32* %tmp2
- %tmp3 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in
- %tmp4 = load i32, i32* %tmp3
+ %tmp = alloca [2 x i32], addrspace(5)
+ %tmp1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %tmp1
+ store i32 1, i32 addrspace(5)* %tmp2
+ %tmp3 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 %in
+ %tmp4 = load i32, i32 addrspace(5)* %tmp3
%tmp5 = load volatile i32, i32 addrspace(1)* undef
%tmp6 = add i32 %tmp4, %tmp5
store i32 %tmp6, i32 addrspace(1)* %out
@@ -56,25 +56,25 @@ entry:
; ASM: ; ScratchSize: 24
define void @no_promote_to_lds_c(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
entry:
- %stack = alloca [5 x i32], align 4
+ %stack = alloca [5 x i32], align 4, addrspace(5)
%0 = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
- store i32 4, i32* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0
+ store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
- store i32 5, i32* %arrayidx3, align 4
- %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
- %2 = load i32, i32* %arrayidx10, align 4
+ %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1
+ store i32 5, i32 addrspace(5)* %arrayidx3, align 4
+ %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
+ %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %2, i32 addrspace(1)* %out, align 4
- %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
- %3 = load i32, i32* %arrayidx12
+ %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
+ %3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %3, i32 addrspace(1)* %arrayidx13
ret void
}
-declare i32 @foo(i32*) #0
+declare i32 @foo(i32 addrspace(5)*) #0
; ASM-LABEL: {{^}}call_private:
; ASM: buffer_store_dword
@@ -83,13 +83,13 @@ declare i32 @foo(i32*) #0
; ASM: ScratchSize: 16396
define amdgpu_kernel void @call_private(i32 addrspace(1)* %out, i32 %in) #0 {
entry:
- %tmp = alloca [2 x i32]
- %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
- store i32 0, i32* %tmp1
- store i32 1, i32* %tmp2
- %tmp3 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in
- %val = call i32 @foo(i32* %tmp3)
+ %tmp = alloca [2 x i32], addrspace(5)
+ %tmp1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %tmp1
+ store i32 1, i32 addrspace(5)* %tmp2
+ %tmp3 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 %in
+ %val = call i32 @foo(i32 addrspace(5)* %tmp3)
store i32 %val, i32 addrspace(1)* %out
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-globals.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-globals.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-globals.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-globals.ll Fri Feb 2 08:07:16 2018
@@ -1,4 +1,4 @@
-; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s
+; RUN: opt -data-layout=A5 -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=ASM %s
@@ -12,19 +12,19 @@
define amdgpu_kernel void @promote_alloca_size_256(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
entry:
- %stack = alloca [10 x i32], align 4
+ %stack = alloca [10 x i32], align 4, addrspace(5)
%tmp = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* %stack, i32 0, i32 %tmp
- store i32 4, i32* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32] addrspace(5)* %stack, i32 0, i32 %tmp
+ store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [10 x i32], [10 x i32]* %stack, i32 0, i32 %tmp1
- store i32 5, i32* %arrayidx3, align 4
- %arrayidx10 = getelementptr inbounds [10 x i32], [10 x i32]* %stack, i32 0, i32 0
- %tmp2 = load i32, i32* %arrayidx10, align 4
+ %arrayidx3 = getelementptr inbounds [10 x i32], [10 x i32] addrspace(5)* %stack, i32 0, i32 %tmp1
+ store i32 5, i32 addrspace(5)* %arrayidx3, align 4
+ %arrayidx10 = getelementptr inbounds [10 x i32], [10 x i32] addrspace(5)* %stack, i32 0, i32 0
+ %tmp2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %tmp2, i32 addrspace(1)* %out, align 4
- %arrayidx12 = getelementptr inbounds [10 x i32], [10 x i32]* %stack, i32 0, i32 1
- %tmp3 = load i32, i32* %arrayidx12
+ %arrayidx12 = getelementptr inbounds [10 x i32], [10 x i32] addrspace(5)* %stack, i32 0, i32 1
+ %tmp3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %tmp3, i32 addrspace(1)* %arrayidx13
%v0 = getelementptr inbounds [750 x [10 x i32]], [750 x [10 x i32]] addrspace(3)* @global_array0, i32 0, i32 0, i32 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll Fri Feb 2 08:07:16 2018
@@ -7,13 +7,13 @@
; OPTS: ds_write
define amdgpu_kernel void @promote_alloca_i32_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
- %alloca = alloca [2 x [2 x i32]]
- %gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
- store i32 0, i32* %gep0
- store i32 1, i32* %gep1
- %gep2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
- %load = load i32, i32* %gep2
+ %alloca = alloca [2 x [2 x i32]], addrspace(5)
+ %gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 0
+ %gep1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %gep0
+ store i32 1, i32 addrspace(5)* %gep1
+ %gep2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index
+ %load = load i32, i32 addrspace(5)* %gep2
store i32 %load, i32 addrspace(1)* %out
ret void
}
@@ -23,13 +23,13 @@ entry:
; ALL-NOT ds_write
define amdgpu_kernel void @optnone_promote_alloca_i32_array_array(i32 addrspace(1)* %out, i32 %index) #1 {
entry:
- %alloca = alloca [2 x [2 x i32]]
- %gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
- store i32 0, i32* %gep0
- store i32 1, i32* %gep1
- %gep2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
- %load = load i32, i32* %gep2
+ %alloca = alloca [2 x [2 x i32]], addrspace(5)
+ %gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 0
+ %gep1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %gep0
+ store i32 1, i32 addrspace(5)* %gep1
+ %gep2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index
+ %load = load i32, i32 addrspace(5)* %gep2
store i32 %load, i32 addrspace(1)* %out
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll Fri Feb 2 08:07:16 2018
@@ -32,19 +32,19 @@
; GCN: workgroup_group_segment_byte_size = 2340
define amdgpu_kernel void @promote_alloca_size_order_0(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %idx) #0 {
entry:
- %stack = alloca [5 x i32], align 4
+ %stack = alloca [5 x i32], align 4, addrspace(5)
%tmp0 = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0
- store i32 4, i32* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp0
+ store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1
- store i32 5, i32* %arrayidx3, align 4
- %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
- %tmp2 = load i32, i32* %arrayidx10, align 4
+ %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp1
+ store i32 5, i32 addrspace(5)* %arrayidx3, align 4
+ %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
+ %tmp2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %tmp2, i32 addrspace(1)* %out, align 4
- %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
- %tmp3 = load i32, i32* %arrayidx12
+ %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
+ %tmp3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %tmp3, i32 addrspace(1)* %arrayidx13
@@ -64,19 +64,19 @@ entry:
; GCN: workgroup_group_segment_byte_size = 2352
define amdgpu_kernel void @promote_alloca_size_order_1(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %idx) #0 {
entry:
- %stack = alloca [5 x i32], align 4
+ %stack = alloca [5 x i32], align 4, addrspace(5)
%tmp0 = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0
- store i32 4, i32* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp0
+ store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1
- store i32 5, i32* %arrayidx3, align 4
- %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
- %tmp2 = load i32, i32* %arrayidx10, align 4
+ %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp1
+ store i32 5, i32 addrspace(5)* %arrayidx3, align 4
+ %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
+ %tmp2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %tmp2, i32 addrspace(1)* %out, align 4
- %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
- %tmp3 = load i32, i32* %arrayidx12
+ %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
+ %tmp3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %tmp3, i32 addrspace(1)* %arrayidx13
@@ -102,19 +102,19 @@ entry:
; GCN: workgroup_group_segment_byte_size = 1060
define amdgpu_kernel void @promote_alloca_align_pad_guess_over_limit(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %idx) #0 {
entry:
- %stack = alloca [5 x i32], align 4
+ %stack = alloca [5 x i32], align 4, addrspace(5)
%tmp0 = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0
- store i32 4, i32* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp0
+ store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1
- store i32 5, i32* %arrayidx3, align 4
- %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
- %tmp2 = load i32, i32* %arrayidx10, align 4
+ %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp1
+ store i32 5, i32 addrspace(5)* %arrayidx3, align 4
+ %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
+ %tmp2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %tmp2, i32 addrspace(1)* %out, align 4
- %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
- %tmp3 = load i32, i32* %arrayidx12
+ %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
+ %tmp3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %tmp3, i32 addrspace(1)* %arrayidx13
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll Fri Feb 2 08:07:16 2018
@@ -5,22 +5,22 @@
; GCN-LABEL: {{^}}stored_lds_pointer_value:
; GCN: buffer_store_dword v
-define amdgpu_kernel void @stored_lds_pointer_value(float* addrspace(1)* %ptr) #0 {
- %tmp = alloca float
- store float 0.0, float *%tmp
- store float* %tmp, float* addrspace(1)* %ptr
+define amdgpu_kernel void @stored_lds_pointer_value(float addrspace(5)* addrspace(1)* %ptr) #0 {
+ %tmp = alloca float, addrspace(5)
+ store float 0.0, float addrspace(5)*%tmp
+ store float addrspace(5)* %tmp, float addrspace(5)* addrspace(1)* %ptr
ret void
}
; GCN-LABEL: {{^}}stored_lds_pointer_value_offset:
; GCN: buffer_store_dword v
-define amdgpu_kernel void @stored_lds_pointer_value_offset(float* addrspace(1)* %ptr) #0 {
- %tmp0 = alloca float
- %tmp1 = alloca float
- store float 0.0, float *%tmp0
- store float 0.0, float *%tmp1
- store volatile float* %tmp0, float* addrspace(1)* %ptr
- store volatile float* %tmp1, float* addrspace(1)* %ptr
+define amdgpu_kernel void @stored_lds_pointer_value_offset(float addrspace(5)* addrspace(1)* %ptr) #0 {
+ %tmp0 = alloca float, addrspace(5)
+ %tmp1 = alloca float, addrspace(5)
+ store float 0.0, float addrspace(5)*%tmp0
+ store float 0.0, float addrspace(5)*%tmp1
+ store volatile float addrspace(5)* %tmp0, float addrspace(5)* addrspace(1)* %ptr
+ store volatile float addrspace(5)* %tmp1, float addrspace(5)* addrspace(1)* %ptr
ret void
}
@@ -29,12 +29,12 @@ define amdgpu_kernel void @stored_lds_po
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
; GCN: buffer_store_dword v
; GCN: buffer_store_dword v
-define amdgpu_kernel void @stored_lds_pointer_value_gep(float* addrspace(1)* %ptr, i32 %idx) #0 {
+define amdgpu_kernel void @stored_lds_pointer_value_gep(float addrspace(5)* addrspace(1)* %ptr, i32 %idx) #0 {
bb:
- %tmp = alloca float, i32 16
- store float 0.0, float* %tmp
- %tmp2 = getelementptr inbounds float, float* %tmp, i32 %idx
- store float* %tmp2, float* addrspace(1)* %ptr
+ %tmp = alloca float, i32 16, addrspace(5)
+ store float 0.0, float addrspace(5)* %tmp
+ %tmp2 = getelementptr inbounds float, float addrspace(5)* %tmp, i32 %idx
+ store float addrspace(5)* %tmp2, float addrspace(5)* addrspace(1)* %ptr
ret void
}
@@ -46,29 +46,29 @@ bb:
; GCN: buffer_store_dword
; GCN: buffer_store_dword
; GCN: buffer_store_dword
-define amdgpu_kernel void @stored_vector_pointer_value(i32* addrspace(1)* %out, i32 %index) {
+define amdgpu_kernel void @stored_vector_pointer_value(i32 addrspace(5)* addrspace(1)* %out, i32 %index) {
entry:
- %tmp0 = alloca [4 x i32]
- %x = getelementptr inbounds [4 x i32], [4 x i32]* %tmp0, i32 0, i32 0
- %y = getelementptr inbounds [4 x i32], [4 x i32]* %tmp0, i32 0, i32 1
- %z = getelementptr inbounds [4 x i32], [4 x i32]* %tmp0, i32 0, i32 2
- %w = getelementptr inbounds [4 x i32], [4 x i32]* %tmp0, i32 0, i32 3
- store i32 0, i32* %x
- store i32 1, i32* %y
- store i32 2, i32* %z
- store i32 3, i32* %w
- %tmp1 = getelementptr inbounds [4 x i32], [4 x i32]* %tmp0, i32 0, i32 %index
- store i32* %tmp1, i32* addrspace(1)* %out
+ %tmp0 = alloca [4 x i32], addrspace(5)
+ %x = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp0, i32 0, i32 0
+ %y = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp0, i32 0, i32 1
+ %z = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp0, i32 0, i32 2
+ %w = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp0, i32 0, i32 3
+ store i32 0, i32 addrspace(5)* %x
+ store i32 1, i32 addrspace(5)* %y
+ store i32 2, i32 addrspace(5)* %z
+ store i32 3, i32 addrspace(5)* %w
+ %tmp1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp0, i32 0, i32 %index
+ store i32 addrspace(5)* %tmp1, i32 addrspace(5)* addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}stored_fi_to_self:
; GCN-NOT: ds_
define amdgpu_kernel void @stored_fi_to_self() #0 {
- %tmp = alloca i32*
- store volatile i32* inttoptr (i32 1234 to i32*), i32** %tmp
- %bitcast = bitcast i32** %tmp to i32*
- store volatile i32* %bitcast, i32** %tmp
+ %tmp = alloca i32 addrspace(5)*, addrspace(5)
+ store volatile i32 addrspace(5)* inttoptr (i32 1234 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp
+ %bitcast = bitcast i32 addrspace(5)* addrspace(5)* %tmp to i32 addrspace(5)*
+ store volatile i32 addrspace(5)* %bitcast, i32 addrspace(5)* addrspace(5)* %tmp
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/r600.alu-limits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/r600.alu-limits.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/r600.alu-limits.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/r600.alu-limits.ll Fri Feb 2 08:07:16 2018
@@ -6,10 +6,10 @@
%struct.foo = type {i32, i32, i32}
-define amdgpu_kernel void @alu_limits(i32 addrspace(1)* %out, %struct.foo* %in, i32 %offset) {
+define amdgpu_kernel void @alu_limits(i32 addrspace(1)* %out, %struct.foo addrspace(5)* %in, i32 %offset) {
entry:
- %ptr = getelementptr inbounds %struct.foo, %struct.foo* %in, i32 1, i32 2
- %x = load i32, i32 *%ptr, align 4
+ %ptr = getelementptr inbounds %struct.foo, %struct.foo addrspace(5)* %in, i32 1, i32 2
+ %x = load i32, i32 addrspace(5)*%ptr, align 4
br label %loop
loop:
%i = phi i32 [ 100, %entry ], [ %nexti, %loop ]
Modified: llvm/trunk/test/CodeGen/AMDGPU/r600.private-memory.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/r600.private-memory.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/r600.private-memory.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/r600.private-memory.ll Fri Feb 2 08:07:16 2018
@@ -12,13 +12,13 @@ declare i32 @llvm.r600.read.tidig.x() no
define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
entry:
- %0 = alloca [2 x i32]
- %1 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 0
- %2 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 1
- store i32 0, i32* %1
- store i32 1, i32* %2
- %3 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 %in
- %4 = load i32, i32* %3
+ %0 = alloca [2 x i32], addrspace(5)
+ %1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 0
+ %2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 1
+ store i32 0, i32 addrspace(5)* %1
+ store i32 1, i32 addrspace(5)* %2
+ %3 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 %in
+ %4 = load i32, i32 addrspace(5)* %3
%5 = call i32 @llvm.r600.read.tidig.x()
%6 = add i32 %4, %5
store i32 %6, i32 addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/sad.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sad.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sad.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sad.ll Fri Feb 2 08:07:16 2018
@@ -59,7 +59,7 @@ define amdgpu_kernel void @v_sad_u32_mul
%t1 = select i1 %icmp1, i32 %a, i32 %b
%ret0 = sub i32 %t0, %t1
- store volatile i32 %ret0, i32 *undef
+ store volatile i32 %ret0, i32 addrspace(5)*undef
%ret = add i32 %ret0, %c
store i32 %ret, i32 addrspace(1)* %out
@@ -77,7 +77,7 @@ define amdgpu_kernel void @v_sad_u32_mul
%ret0 = sub i32 %t0, %t1
%ret = add i32 %ret0, %c
- store volatile i32 %ret, i32 *undef
+ store volatile i32 %ret, i32 addrspace(5)*undef
store i32 %ret, i32 addrspace(1)* %out
ret void
}
@@ -87,7 +87,7 @@ define amdgpu_kernel void @v_sad_u32_mul
define amdgpu_kernel void @v_sad_u32_multi_use_max_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
%icmp0 = icmp ugt i32 %a, %b
%t0 = select i1 %icmp0, i32 %a, i32 %b
- store volatile i32 %t0, i32 *undef
+ store volatile i32 %t0, i32 addrspace(5)*undef
%icmp1 = icmp ule i32 %a, %b
%t1 = select i1 %icmp1, i32 %a, i32 %b
@@ -108,7 +108,7 @@ define amdgpu_kernel void @v_sad_u32_mul
%icmp1 = icmp ule i32 %a, %b
%t1 = select i1 %icmp1, i32 %a, i32 %b
- store volatile i32 %t1, i32 *undef
+ store volatile i32 %t1, i32 addrspace(5)*undef
%ret0 = sub i32 %t0, %t1
%ret = add i32 %ret0, %c
@@ -122,7 +122,7 @@ define amdgpu_kernel void @v_sad_u32_mul
define amdgpu_kernel void @v_sad_u32_multi_use_sub_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
%icmp0 = icmp ugt i32 %a, %b
%sub0 = sub i32 %a, %b
- store volatile i32 %sub0, i32 *undef
+ store volatile i32 %sub0, i32 addrspace(5)*undef
%sub1 = sub i32 %b, %a
%ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1
@@ -141,7 +141,7 @@ define amdgpu_kernel void @v_sad_u32_mul
%sub0 = sub i32 %a, %b
%sub1 = sub i32 %b, %a
%ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1
- store volatile i32 %ret0, i32 *undef
+ store volatile i32 %ret0, i32 addrspace(5)*undef
%ret = add i32 %ret0, %c
Modified: llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll Fri Feb 2 08:07:16 2018
@@ -15,26 +15,26 @@
define amdgpu_kernel void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) {
entry:
- %scratch0 = alloca [8192 x i32]
- %scratch1 = alloca [8192 x i32]
+ %scratch0 = alloca [8192 x i32], addrspace(5)
+ %scratch1 = alloca [8192 x i32], addrspace(5)
- %scratchptr0 = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 0
- store i32 1, i32* %scratchptr0
+ %scratchptr0 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 0
+ store i32 1, i32 addrspace(5)* %scratchptr0
- %scratchptr1 = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 0
- store i32 2, i32* %scratchptr1
+ %scratchptr1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 0
+ store i32 2, i32 addrspace(5)* %scratchptr1
%cmp = icmp eq i32 %cond, 0
br i1 %cmp, label %if, label %else
if:
- %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset
- %if_value = load i32, i32* %if_ptr
+ %if_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %if_offset
+ %if_value = load i32, i32 addrspace(5)* %if_ptr
br label %done
else:
- %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset
- %else_value = load i32, i32* %else_ptr
+ %else_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %else_offset
+ %else_value = load i32, i32 addrspace(5)* %else_ptr
br label %done
done:
@@ -55,29 +55,29 @@ done:
define amdgpu_kernel void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) {
entry:
- %scratch0 = alloca [8192 x i32]
- %scratch1 = alloca [8192 x i32]
+ %scratch0 = alloca [8192 x i32], addrspace(5)
+ %scratch1 = alloca [8192 x i32], addrspace(5)
%offset0 = load i32, i32 addrspace(1)* %offsets
- %scratchptr0 = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %offset0
- store i32 %offset0, i32* %scratchptr0
+ %scratchptr0 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %offset0
+ store i32 %offset0, i32 addrspace(5)* %scratchptr0
%offsetptr1 = getelementptr i32, i32 addrspace(1)* %offsets, i32 1
%offset1 = load i32, i32 addrspace(1)* %offsetptr1
- %scratchptr1 = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %offset1
- store i32 %offset1, i32* %scratchptr1
+ %scratchptr1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %offset1
+ store i32 %offset1, i32 addrspace(5)* %scratchptr1
%cmp = icmp eq i32 %cond, 0
br i1 %cmp, label %if, label %else
if:
- %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset
- %if_value = load i32, i32* %if_ptr
+ %if_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %if_offset
+ %if_value = load i32, i32 addrspace(5)* %if_ptr
br label %done
else:
- %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset
- %else_value = load i32, i32* %else_ptr
+ %else_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %else_offset
+ %else_value = load i32, i32 addrspace(5)* %else_ptr
br label %done
done:
@@ -91,10 +91,10 @@ done:
; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen{{$}}
define amdgpu_kernel void @neg_vaddr_offset_inbounds(i32 %offset) {
entry:
- %array = alloca [8192 x i32]
+ %array = alloca [8192 x i32], addrspace(5)
%ptr_offset = add i32 %offset, 4
- %ptr = getelementptr inbounds [8192 x i32], [8192 x i32]* %array, i32 0, i32 %ptr_offset
- store i32 0, i32* %ptr
+ %ptr = getelementptr inbounds [8192 x i32], [8192 x i32] addrspace(5)* %array, i32 0, i32 %ptr_offset
+ store i32 0, i32 addrspace(5)* %ptr
ret void
}
@@ -103,10 +103,10 @@ entry:
; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen{{$}}
define amdgpu_kernel void @neg_vaddr_offset(i32 %offset) {
entry:
- %array = alloca [8192 x i32]
+ %array = alloca [8192 x i32], addrspace(5)
%ptr_offset = add i32 %offset, 4
- %ptr = getelementptr [8192 x i32], [8192 x i32]* %array, i32 0, i32 %ptr_offset
- store i32 0, i32* %ptr
+ %ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %array, i32 0, i32 %ptr_offset
+ store i32 0, i32 addrspace(5)* %ptr
ret void
}
@@ -114,11 +114,11 @@ entry:
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:20
define amdgpu_kernel void @pos_vaddr_offset(i32 addrspace(1)* %out, i32 %offset) {
entry:
- %array = alloca [8192 x i32]
- %ptr = getelementptr [8192 x i32], [8192 x i32]* %array, i32 0, i32 4
- store i32 0, i32* %ptr
- %load_ptr = getelementptr [8192 x i32], [8192 x i32]* %array, i32 0, i32 %offset
- %val = load i32, i32* %load_ptr
+ %array = alloca [8192 x i32], addrspace(5)
+ %ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %array, i32 0, i32 4
+ store i32 0, i32 addrspace(5)* %ptr
+ %load_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %array, i32 0, i32 %offset
+ %val = load i32, i32 addrspace(5)* %load_ptr
store i32 %val, i32 addrspace(1)* %out
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/shl_add_ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shl_add_ptr.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shl_add_ptr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shl_add_ptr.ll Fri Feb 2 08:07:16 2018
@@ -342,10 +342,10 @@ define void @shl_add_ptr_combine_2use_pr
%idx.add = add nuw i32 %idx, 4
%shl0 = shl i32 %idx.add, 2
%shl1 = shl i32 %idx.add, 3
- %ptr0 = inttoptr i32 %shl0 to i32*
- %ptr1 = inttoptr i32 %shl1 to i32*
- store volatile i32 9, i32* %ptr0
- store volatile i32 10, i32* %ptr1
+ %ptr0 = inttoptr i32 %shl0 to i32 addrspace(5)*
+ %ptr1 = inttoptr i32 %shl1 to i32 addrspace(5)*
+ store volatile i32 9, i32 addrspace(5)* %ptr0
+ store volatile i32 10, i32 addrspace(5)* %ptr1
ret void
}
@@ -360,10 +360,10 @@ define void @shl_add_ptr_combine_2use_ma
%idx.add = add nuw i32 %idx, 511
%shl0 = shl i32 %idx.add, 3
%shl1 = shl i32 %idx.add, 4
- %ptr0 = inttoptr i32 %shl0 to i32*
- %ptr1 = inttoptr i32 %shl1 to i32*
- store volatile i32 9, i32* %ptr0
- store volatile i32 10, i32* %ptr1
+ %ptr0 = inttoptr i32 %shl0 to i32 addrspace(5)*
+ %ptr1 = inttoptr i32 %shl1 to i32 addrspace(5)*
+ store volatile i32 9, i32 addrspace(5)* %ptr0
+ store volatile i32 10, i32 addrspace(5)* %ptr1
ret void
}
; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_both_max_private_offset:
@@ -377,10 +377,10 @@ define void @shl_add_ptr_combine_2use_bo
%idx.add = add nuw i32 %idx, 256
%shl0 = shl i32 %idx.add, 4
%shl1 = shl i32 %idx.add, 5
- %ptr0 = inttoptr i32 %shl0 to i32*
- %ptr1 = inttoptr i32 %shl1 to i32*
- store volatile i32 9, i32* %ptr0
- store volatile i32 10, i32* %ptr1
+ %ptr0 = inttoptr i32 %shl0 to i32 addrspace(5)*
+ %ptr1 = inttoptr i32 %shl1 to i32 addrspace(5)*
+ store volatile i32 9, i32 addrspace(5)* %ptr0
+ store volatile i32 10, i32 addrspace(5)* %ptr1
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/spill-empty-live-interval.mir?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/spill-empty-live-interval.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/spill-empty-live-interval.mir Fri Feb 2 08:07:16 2018
@@ -8,12 +8,12 @@
# CHECK-LABEL: name: expecting_non_empty_interval
# CHECK: undef %7.sub1:vreg_64 = V_MAC_F32_e32 0, undef %1:vgpr_32, undef %7.sub1, implicit $exec
-# CHECK-NEXT: SI_SPILL_V64_SAVE %7, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (store 8 into %stack.0, align 4)
+# CHECK-NEXT: SI_SPILL_V64_SAVE %7, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
# CHECK-NEXT: undef %5.sub1:vreg_64 = V_MOV_B32_e32 1786773504, implicit $exec
# CHECK-NEXT: dead %2:vgpr_32 = V_MUL_F32_e32 0, %5.sub1, implicit $exec
# CHECK: S_NOP 0, implicit %6.sub1
-# CHECK-NEXT: %8:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 8 from %stack.0, align 4)
+# CHECK-NEXT: %8:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
# CHECK-NEXT: S_NOP 0, implicit %8.sub1
# CHECK-NEXT: S_NOP 0, implicit undef %9.sub0
Modified: llvm/trunk/test/CodeGen/AMDGPU/stack-size-overflow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/stack-size-overflow.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/stack-size-overflow.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/stack-size-overflow.ll Fri Feb 2 08:07:16 2018
@@ -1,14 +1,14 @@
; RUN: not llc -march=amdgcn < %s 2>&1 | FileCheck -check-prefix=ERROR %s
; RUN: not llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #1
+declare void @llvm.memset.p5i8.i32(i8 addrspace(5)* nocapture, i8, i32, i32, i1) #1
; ERROR: error: stack size limit exceeded (4294967296) in stack_size_limit
; GCN: ; ScratchSize: 4294967296
define amdgpu_kernel void @stack_size_limit() #0 {
entry:
- %alloca = alloca [1073741823 x i32], align 4
- %bc = bitcast [1073741823 x i32]* %alloca to i8*
- call void @llvm.memset.p0i8.i32(i8* %bc, i8 9, i32 1073741823, i1 true)
+ %alloca = alloca [1073741823 x i32], align 4, addrspace(5)
+ %bc = bitcast [1073741823 x i32] addrspace(5)* %alloca to i8 addrspace(5)*
+ call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %bc, i8 9, i32 1073741823, i32 1, i1 true)
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir Fri Feb 2 08:07:16 2018
@@ -9,11 +9,11 @@
# CHECK: - { id: 1, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
# CHECK-NEXT: stack-id: 1,
-# CHECK: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (store 4 into %stack.0)
-# CHECK: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 4 from %stack.0)
+# CHECK: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+# CHECK: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
-# CHECK: SI_SPILL_S32_SAVE killed renamable $sgpr6, %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5, implicit-def dead $m0 :: (store 4 into %stack.1)
-# CHECK: $sgpr6 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5, implicit-def dead $m0 :: (load 4 from %stack.1)
+# CHECK: SI_SPILL_S32_SAVE killed renamable $sgpr6, %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5, implicit-def dead $m0 :: (store 4 into %stack.1, addrspace 5)
+# CHECK: $sgpr6 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5, implicit-def dead $m0 :: (load 4 from %stack.1, addrspace 5)
name: no_merge_sgpr_vgpr_spill_slot
tracksRegLiveness: true
Modified: llvm/trunk/test/CodeGen/AMDGPU/store-hi16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/store-hi16.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/store-hi16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/store-hi16.ll Fri Feb 2 08:07:16 2018
@@ -187,11 +187,11 @@ entry:
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2i16(i16 addrspace(4)* %out, i32 %arg) #0 {
+define void @store_flat_hi_v2i16(i16* %out, i32 %arg) #0 {
entry:
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
- store i16 %hi, i16 addrspace(4)* %out
+ store i16 %hi, i16* %out
ret void
}
@@ -205,11 +205,11 @@ entry:
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2f16(half addrspace(4)* %out, i32 %arg) #0 {
+define void @store_flat_hi_v2f16(half* %out, i32 %arg) #0 {
entry:
%value = bitcast i32 %arg to <2 x half>
%hi = extractelement <2 x half> %value, i32 1
- store half %hi, half addrspace(4)* %out
+ store half %hi, half* %out
ret void
}
@@ -223,11 +223,11 @@ entry:
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_i32_shift(i16 addrspace(4)* %out, i32 %value) #0 {
+define void @store_flat_hi_i32_shift(i16* %out, i32 %value) #0 {
entry:
%hi32 = lshr i32 %value, 16
%hi = trunc i32 %hi32 to i16
- store i16 %hi, i16 addrspace(4)* %out
+ store i16 %hi, i16* %out
ret void
}
@@ -241,12 +241,12 @@ entry:
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2i16_i8(i8 addrspace(4)* %out, i32 %arg) #0 {
+define void @store_flat_hi_v2i16_i8(i8* %out, i32 %arg) #0 {
entry:
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
%trunc = trunc i16 %hi to i8
- store i8 %trunc, i8 addrspace(4)* %out
+ store i8 %trunc, i8* %out
ret void
}
@@ -260,11 +260,11 @@ entry:
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_i8_shift(i8 addrspace(4)* %out, i32 %value) #0 {
+define void @store_flat_hi_i8_shift(i8* %out, i32 %value) #0 {
entry:
%hi32 = lshr i32 %value, 16
%hi = trunc i32 %hi32 to i8
- store i8 %hi, i8 addrspace(4)* %out
+ store i8 %hi, i8* %out
ret void
}
@@ -278,12 +278,12 @@ entry:
; VI: flat_store_short v[0:1], v2{{$}}
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2i16_max_offset(i16 addrspace(4)* %out, i32 %arg) #0 {
+define void @store_flat_hi_v2i16_max_offset(i16* %out, i32 %arg) #0 {
entry:
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
- %gep = getelementptr inbounds i16, i16 addrspace(4)* %out, i64 2047
- store i16 %hi, i16 addrspace(4)* %gep
+ %gep = getelementptr inbounds i16, i16* %out, i64 2047
+ store i16 %hi, i16* %gep
ret void
}
@@ -297,12 +297,12 @@ entry:
; VI: flat_store_short v[0:1], v2{{$}}
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2i16_neg_offset(i16 addrspace(4)* %out, i32 %arg) #0 {
+define void @store_flat_hi_v2i16_neg_offset(i16* %out, i32 %arg) #0 {
entry:
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
- %gep = getelementptr inbounds i16, i16 addrspace(4)* %out, i64 -1023
- store i16 %hi, i16 addrspace(4)* %gep
+ %gep = getelementptr inbounds i16, i16* %out, i64 -1023
+ store i16 %hi, i16* %gep
ret void
}
@@ -316,13 +316,13 @@ entry:
; VI: flat_store_byte v[0:1], v2{{$}}
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2i16_i8_max_offset(i8 addrspace(4)* %out, i32 %arg) #0 {
+define void @store_flat_hi_v2i16_i8_max_offset(i8* %out, i32 %arg) #0 {
entry:
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
%trunc = trunc i16 %hi to i8
- %gep = getelementptr inbounds i8, i8 addrspace(4)* %out, i64 4095
- store i8 %trunc, i8 addrspace(4)* %gep
+ %gep = getelementptr inbounds i8, i8* %out, i64 4095
+ store i8 %trunc, i8* %gep
ret void
}
@@ -337,13 +337,13 @@ entry:
; VI: flat_store_byte v[0:1], v2{{$}}
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2i16_i8_neg_offset(i8 addrspace(4)* %out, i32 %arg) #0 {
+define void @store_flat_hi_v2i16_i8_neg_offset(i8* %out, i32 %arg) #0 {
entry:
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
%trunc = trunc i16 %hi to i8
- %gep = getelementptr inbounds i8, i8 addrspace(4)* %out, i64 -4095
- store i8 %trunc, i8 addrspace(4)* %gep
+ %gep = getelementptr inbounds i8, i8* %out, i64 -4095
+ store i8 %trunc, i8* %gep
ret void
}
@@ -357,12 +357,12 @@ entry:
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_private_hi_v2i16(i16* %out, i32 %arg) #0 {
+define void @store_private_hi_v2i16(i16 addrspace(5)* %out, i32 %arg) #0 {
entry:
; FIXME: ABI for pre-gfx9
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
- store i16 %hi, i16* %out
+ store i16 %hi, i16 addrspace(5)* %out
ret void
}
@@ -376,12 +376,12 @@ entry:
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_private_hi_v2f16(half* %out, i32 %arg) #0 {
+define void @store_private_hi_v2f16(half addrspace(5)* %out, i32 %arg) #0 {
entry:
; FIXME: ABI for pre-gfx9
%value = bitcast i32 %arg to <2 x half>
%hi = extractelement <2 x half> %value, i32 1
- store half %hi, half* %out
+ store half %hi, half addrspace(5)* %out
ret void
}
@@ -395,11 +395,11 @@ entry:
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_private_hi_i32_shift(i16* %out, i32 %value) #0 {
+define void @store_private_hi_i32_shift(i16 addrspace(5)* %out, i32 %value) #0 {
entry:
%hi32 = lshr i32 %value, 16
%hi = trunc i32 %hi32 to i16
- store i16 %hi, i16* %out
+ store i16 %hi, i16 addrspace(5)* %out
ret void
}
@@ -413,12 +413,12 @@ entry:
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_private_hi_v2i16_i8(i8* %out, i32 %arg) #0 {
+define void @store_private_hi_v2i16_i8(i8 addrspace(5)* %out, i32 %arg) #0 {
entry:
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
%trunc = trunc i16 %hi to i8
- store i8 %trunc, i8* %out
+ store i8 %trunc, i8 addrspace(5)* %out
ret void
}
@@ -432,11 +432,11 @@ entry:
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_private_hi_i8_shift(i8* %out, i32 %value) #0 {
+define void @store_private_hi_i8_shift(i8 addrspace(5)* %out, i32 %value) #0 {
entry:
%hi32 = lshr i32 %value, 16
%hi = trunc i32 %hi32 to i8
- store i8 %hi, i8* %out
+ store i8 %hi, i8 addrspace(5)* %out
ret void
}
@@ -449,12 +449,12 @@ entry:
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
-define void @store_private_hi_v2i16_max_offset(i16* byval %out, i32 %arg) #0 {
+define void @store_private_hi_v2i16_max_offset(i16 addrspace(5)* byval %out, i32 %arg) #0 {
entry:
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
- %gep = getelementptr inbounds i16, i16* %out, i64 2045
- store i16 %hi, i16* %gep
+ %gep = getelementptr inbounds i16, i16 addrspace(5)* %out, i64 2045
+ store i16 %hi, i16 addrspace(5)* %gep
ret void
}
@@ -475,7 +475,7 @@ entry:
; FIXME: ABI for pre-gfx9
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
- store volatile i16 %hi, i16* null
+ store volatile i16 %hi, i16 addrspace(5)* null
ret void
}
@@ -495,7 +495,7 @@ entry:
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
%trunc = trunc i16 %hi to i8
- store volatile i8 %trunc, i8* null
+ store volatile i8 %trunc, i8 addrspace(5)* null
ret void
}
@@ -599,14 +599,14 @@ entry:
; GFX9-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s5 offset:4094
define void @store_private_hi_v2i16_to_offset(i32 %arg) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4
- %obj1 = alloca [4096 x i16], align 2
- %bc = bitcast [10 x i32]* %obj0 to i32*
- store volatile i32 123, i32* %bc
+ %obj0 = alloca [10 x i32], align 4, addrspace(5)
+ %obj1 = alloca [4096 x i16], align 2, addrspace(5)
+ %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
+ store volatile i32 123, i32 addrspace(5)* %bc
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
- %gep = getelementptr inbounds [4096 x i16], [4096 x i16]* %obj1, i32 0, i32 2025
- store i16 %hi, i16* %gep
+ %gep = getelementptr inbounds [4096 x i16], [4096 x i16] addrspace(5)* %obj1, i32 0, i32 2025
+ store i16 %hi, i16 addrspace(5)* %gep
ret void
}
@@ -616,15 +616,15 @@ entry:
; GFX9-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s5 offset:4095
define void @store_private_hi_v2i16_i8_to_offset(i32 %arg) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4
- %obj1 = alloca [4096 x i8], align 2
- %bc = bitcast [10 x i32]* %obj0 to i32*
- store volatile i32 123, i32* %bc
+ %obj0 = alloca [10 x i32], align 4, addrspace(5)
+ %obj1 = alloca [4096 x i8], align 2, addrspace(5)
+ %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
+ store volatile i32 123, i32 addrspace(5)* %bc
%value = bitcast i32 %arg to <2 x i16>
%hi = extractelement <2 x i16> %value, i32 1
- %gep = getelementptr inbounds [4096 x i8], [4096 x i8]* %obj1, i32 0, i32 4051
+ %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4051
%trunc = trunc i16 %hi to i8
- store i8 %trunc, i8* %gep
+ store i8 %trunc, i8 addrspace(5)* %gep
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/store-private.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/store-private.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/store-private.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/store-private.ll Fri Feb 2 08:07:16 2018
@@ -15,9 +15,9 @@
; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
; SI: buffer_store_byte
-define amdgpu_kernel void @store_i1(i1 addrspace(0)* %out) {
+define amdgpu_kernel void @store_i1(i1 addrspace(5)* %out) {
entry:
- store i1 true, i1 addrspace(0)* %out
+ store i1 true, i1 addrspace(5)* %out
ret void
}
@@ -44,9 +44,9 @@ entry:
; SI: buffer_store_byte
-define amdgpu_kernel void @store_i8(i8 addrspace(0)* %out, i8 %in) {
+define amdgpu_kernel void @store_i8(i8 addrspace(5)* %out, i8 %in) {
entry:
- store i8 %in, i8 addrspace(0)* %out
+ store i8 %in, i8 addrspace(5)* %out
ret void
}
@@ -72,9 +72,9 @@ entry:
; EG: MOV * T(0 + AR.x).X+, [[RES]]
; SI: buffer_store_short
-define amdgpu_kernel void @store_i16(i16 addrspace(0)* %out, i16 %in) {
+define amdgpu_kernel void @store_i16(i16 addrspace(5)* %out, i16 %in) {
entry:
- store i16 %in, i16 addrspace(0)* %out
+ store i16 %in, i16 addrspace(5)* %out
ret void
}
@@ -102,9 +102,9 @@ entry:
; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
; CM: MOVA_INT
; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
-define amdgpu_kernel void @store_i24(i24 addrspace(0)* %out, i24 %in) {
+define amdgpu_kernel void @store_i24(i24 addrspace(5)* %out, i24 %in) {
entry:
- store i24 %in, i24 addrspace(0)* %out
+ store i24 %in, i24 addrspace(5)* %out
ret void
}
@@ -120,9 +120,9 @@ entry:
; CM: MOVA_INT
; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
; CM-NOT: MOVA_INT
-define amdgpu_kernel void @store_i25(i25 addrspace(0)* %out, i25 %in) {
+define amdgpu_kernel void @store_i25(i25 addrspace(5)* %out, i25 %in) {
entry:
- store i25 %in, i25 addrspace(0)* %out
+ store i25 %in, i25 addrspace(5)* %out
ret void
}
@@ -141,10 +141,10 @@ entry:
; CM-NOT: MOVA_INT
; SI: buffer_store_short
-define amdgpu_kernel void @store_v2i8(<2 x i8> addrspace(0)* %out, <2 x i32> %in) {
+define amdgpu_kernel void @store_v2i8(<2 x i8> addrspace(5)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i8>
- store <2 x i8> %0, <2 x i8> addrspace(0)* %out
+ store <2 x i8> %0, <2 x i8> addrspace(5)* %out
ret void
}
@@ -172,10 +172,10 @@ entry:
; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
; SI: buffer_store_byte
-define amdgpu_kernel void @store_v2i8_unaligned(<2 x i8> addrspace(0)* %out, <2 x i32> %in) {
+define amdgpu_kernel void @store_v2i8_unaligned(<2 x i8> addrspace(5)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i8>
- store <2 x i8> %0, <2 x i8> addrspace(0)* %out, align 1
+ store <2 x i8> %0, <2 x i8> addrspace(5)* %out, align 1
ret void
}
@@ -191,10 +191,10 @@ entry:
; CM-NOT: MOVA_INT
; SI: buffer_store_dword
-define amdgpu_kernel void @store_v2i16(<2 x i16> addrspace(0)* %out, <2 x i32> %in) {
+define amdgpu_kernel void @store_v2i16(<2 x i16> addrspace(5)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i16>
- store <2 x i16> %0, <2 x i16> addrspace(0)* %out
+ store <2 x i16> %0, <2 x i16> addrspace(5)* %out
ret void
}
@@ -223,10 +223,10 @@ entry:
; SI: buffer_store_short
; SI: buffer_store_short
-define amdgpu_kernel void @store_v2i16_unaligned(<2 x i16> addrspace(0)* %out, <2 x i32> %in) {
+define amdgpu_kernel void @store_v2i16_unaligned(<2 x i16> addrspace(5)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i16>
- store <2 x i16> %0, <2 x i16> addrspace(0)* %out, align 2
+ store <2 x i16> %0, <2 x i16> addrspace(5)* %out, align 2
ret void
}
@@ -240,10 +240,10 @@ entry:
; CM-NOT: MOVA_INT
; SI: buffer_store_dword
-define amdgpu_kernel void @store_v4i8(<4 x i8> addrspace(0)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i8(<4 x i8> addrspace(5)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i8>
- store <4 x i8> %0, <4 x i8> addrspace(0)* %out
+ store <4 x i8> %0, <4 x i8> addrspace(5)* %out
ret void
}
@@ -299,10 +299,10 @@ entry:
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI-NOT: buffer_store_dword
-define amdgpu_kernel void @store_v4i8_unaligned(<4 x i8> addrspace(0)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i8_unaligned(<4 x i8> addrspace(5)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i8>
- store <4 x i8> %0, <4 x i8> addrspace(0)* %out, align 1
+ store <4 x i8> %0, <4 x i8> addrspace(5)* %out, align 1
ret void
}
@@ -410,10 +410,10 @@ entry:
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI-NOT: buffer_store_dword
-define amdgpu_kernel void @store_v8i8_unaligned(<8 x i8> addrspace(0)* %out, <8 x i32> %in) {
+define amdgpu_kernel void @store_v8i8_unaligned(<8 x i8> addrspace(5)* %out, <8 x i32> %in) {
entry:
%0 = trunc <8 x i32> %in to <8 x i8>
- store <8 x i8> %0, <8 x i8> addrspace(0)* %out, align 1
+ store <8 x i8> %0, <8 x i8> addrspace(5)* %out, align 1
ret void
}
@@ -443,10 +443,10 @@ entry:
; SI: buffer_store_short
; SI: buffer_store_short
; SI-NOT: buffer_store_dword
-define amdgpu_kernel void @store_v4i8_halfaligned(<4 x i8> addrspace(0)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i8_halfaligned(<4 x i8> addrspace(5)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i8>
- store <4 x i8> %0, <4 x i8> addrspace(0)* %out, align 2
+ store <4 x i8> %0, <4 x i8> addrspace(5)* %out, align 2
ret void
}
@@ -460,8 +460,8 @@ entry:
; SI: buffer_store_dword
-define amdgpu_kernel void @store_f32(float addrspace(0)* %out, float %in) {
- store float %in, float addrspace(0)* %out
+define amdgpu_kernel void @store_f32(float addrspace(5)* %out, float %in) {
+ store float %in, float addrspace(5)* %out
ret void
}
@@ -480,10 +480,10 @@ define amdgpu_kernel void @store_f32(flo
; XSI: buffer_store_dwordx2
; SI: buffer_store_dword
; SI: buffer_store_dword
-define amdgpu_kernel void @store_v4i16(<4 x i16> addrspace(0)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i16(<4 x i16> addrspace(5)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i16>
- store <4 x i16> %0, <4 x i16> addrspace(0)* %out
+ store <4 x i16> %0, <4 x i16> addrspace(5)* %out
ret void
}
@@ -504,11 +504,11 @@ entry:
; SI: buffer_store_dword
; SI: buffer_store_dword
-define amdgpu_kernel void @store_v2f32(<2 x float> addrspace(0)* %out, float %a, float %b) {
+define amdgpu_kernel void @store_v2f32(<2 x float> addrspace(5)* %out, float %a, float %b) {
entry:
%0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0
%1 = insertelement <2 x float> %0, float %b, i32 1
- store <2 x float> %1, <2 x float> addrspace(0)* %out
+ store <2 x float> %1, <2 x float> addrspace(5)* %out
ret void
}
@@ -533,8 +533,8 @@ entry:
; SI: buffer_store_dword
; SI: buffer_store_dword
-define amdgpu_kernel void @store_v3i32(<3 x i32> addrspace(0)* %out, <3 x i32> %a) nounwind {
- store <3 x i32> %a, <3 x i32> addrspace(0)* %out, align 16
+define amdgpu_kernel void @store_v3i32(<3 x i32> addrspace(5)* %out, <3 x i32> %a) nounwind {
+ store <3 x i32> %a, <3 x i32> addrspace(5)* %out, align 16
ret void
}
@@ -563,9 +563,9 @@ define amdgpu_kernel void @store_v3i32(<
; SI: buffer_store_dword
; SI: buffer_store_dword
; SI: buffer_store_dword
-define amdgpu_kernel void @store_v4i32(<4 x i32> addrspace(0)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i32(<4 x i32> addrspace(5)* %out, <4 x i32> %in) {
entry:
- store <4 x i32> %in, <4 x i32> addrspace(0)* %out
+ store <4 x i32> %in, <4 x i32> addrspace(5)* %out
ret void
}
@@ -594,9 +594,9 @@ entry:
; SI: buffer_store_dword
; SI: buffer_store_dword
; SI: buffer_store_dword
-define amdgpu_kernel void @store_v4i32_unaligned(<4 x i32> addrspace(0)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i32_unaligned(<4 x i32> addrspace(5)* %out, <4 x i32> %in) {
entry:
- store <4 x i32> %in, <4 x i32> addrspace(0)* %out, align 4
+ store <4 x i32> %in, <4 x i32> addrspace(5)* %out, align 4
ret void
}
@@ -626,9 +626,9 @@ entry:
; SI: buffer_store_dword
; SI: buffer_store_dword
; SI: buffer_store_dword
-define amdgpu_kernel void @store_v4f32(<4 x float> addrspace(0)* %out, <4 x float> addrspace(0)* %in) {
- %1 = load <4 x float>, <4 x float> addrspace(0) * %in
- store <4 x float> %1, <4 x float> addrspace(0)* %out
+define amdgpu_kernel void @store_v4f32(<4 x float> addrspace(5)* %out, <4 x float> addrspace(5)* %in) {
+ %1 = load <4 x float>, <4 x float> addrspace(5)* %in
+ store <4 x float> %1, <4 x float> addrspace(5)* %out
ret void
}
@@ -644,10 +644,10 @@ define amdgpu_kernel void @store_v4f32(<
; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
; SI: buffer_store_byte
-define amdgpu_kernel void @store_i64_i8(i8 addrspace(0)* %out, i64 %in) {
+define amdgpu_kernel void @store_i64_i8(i8 addrspace(5)* %out, i64 %in) {
entry:
%0 = trunc i64 %in to i8
- store i8 %0, i8 addrspace(0)* %out
+ store i8 %0, i8 addrspace(5)* %out
ret void
}
@@ -663,10 +663,10 @@ entry:
; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
; SI: buffer_store_short
-define amdgpu_kernel void @store_i64_i16(i16 addrspace(0)* %out, i64 %in) {
+define amdgpu_kernel void @store_i64_i16(i16 addrspace(5)* %out, i64 %in) {
entry:
%0 = trunc i64 %in to i16
- store i16 %0, i16 addrspace(0)* %out
+ store i16 %0, i16 addrspace(5)* %out
ret void
}
@@ -689,14 +689,14 @@ entry:
; XSI: buffer_store_dwordx2
; SI: buffer_store_dword
; SI: buffer_store_dword
-define amdgpu_kernel void @vecload2(i32 addrspace(0)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
+define amdgpu_kernel void @vecload2(i32 addrspace(5)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
entry:
%0 = load i32, i32 addrspace(2)* %mem, align 4
%arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1
%1 = load i32, i32 addrspace(2)* %arrayidx1.i, align 4
- store i32 %0, i32 addrspace(0)* %out, align 4
- %arrayidx1 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 1
- store i32 %1, i32 addrspace(0)* %arrayidx1, align 4
+ store i32 %0, i32 addrspace(5)* %out, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32 addrspace(5)* %out, i64 1
+ store i32 %1, i32 addrspace(5)* %arrayidx1, align 4
ret void
}
@@ -727,15 +727,15 @@ entry:
; SI: buffer_store_dword
; SI: buffer_store_dword
; SI: buffer_store_dword
-define amdgpu_kernel void @i128-const-store(i32 addrspace(0)* %out) {
+define amdgpu_kernel void @i128-const-store(i32 addrspace(5)* %out) {
entry:
- store i32 1, i32 addrspace(0)* %out, align 4
- %arrayidx2 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 1
- store i32 1, i32 addrspace(0)* %arrayidx2, align 4
- %arrayidx4 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 2
- store i32 2, i32 addrspace(0)* %arrayidx4, align 4
- %arrayidx6 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 3
- store i32 2, i32 addrspace(0)* %arrayidx6, align 4
+ store i32 1, i32 addrspace(5)* %out, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32 addrspace(5)* %out, i64 1
+ store i32 1, i32 addrspace(5)* %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds i32, i32 addrspace(5)* %out, i64 2
+ store i32 2, i32 addrspace(5)* %arrayidx4, align 4
+ %arrayidx6 = getelementptr inbounds i32, i32 addrspace(5)* %out, i64 3
+ store i32 2, i32 addrspace(5)* %arrayidx6, align 4
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/store-vector-ptrs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/store-vector-ptrs.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/store-vector-ptrs.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/store-vector-ptrs.ll Fri Feb 2 08:07:16 2018
@@ -5,8 +5,8 @@
; AMDGPUDAGToDAGISel::SelectMUBUFScratch() which is used for selecting
; scratch loads and stores.
; CHECK-LABEL: {{^}}store_vector_ptrs:
-define amdgpu_kernel void @store_vector_ptrs(<4 x i32*>* %out, <4 x [1024 x i32]*> %array) nounwind {
- %p = getelementptr [1024 x i32], <4 x [1024 x i32]*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
- store <4 x i32*> %p, <4 x i32*>* %out
+define amdgpu_kernel void @store_vector_ptrs(<4 x i32 addrspace(5)*> addrspace(5)* %out, <4 x [1024 x i32] addrspace(5)*> %array) nounwind {
+ %p = getelementptr [1024 x i32], <4 x [1024 x i32] addrspace(5)*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
+ store <4 x i32 addrspace(5)*> %p, <4 x i32 addrspace(5)*> addrspace(5)* %out
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/syncscopes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/syncscopes.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/syncscopes.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/syncscopes.ll Fri Feb 2 08:07:16 2018
@@ -1,19 +1,19 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -stop-before=si-debugger-insert-nops < %s | FileCheck --check-prefix=GCN %s
; GCN-LABEL: name: syncscopes
-; GCN: FLAT_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent") seq_cst 4 into %ir.agent_out, addrspace 4)
-; GCN: FLAT_STORE_DWORD killed renamable $vgpr4_vgpr5, killed renamable $vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out, addrspace 4)
-; GCN: FLAT_STORE_DWORD killed renamable $vgpr7_vgpr8, killed renamable $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out, addrspace 4)
+; GCN: FLAT_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent") seq_cst 4 into %ir.agent_out)
+; GCN: FLAT_STORE_DWORD killed renamable $vgpr4_vgpr5, killed renamable $vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out)
+; GCN: FLAT_STORE_DWORD killed renamable $vgpr7_vgpr8, killed renamable $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out)
define void @syncscopes(
i32 %agent,
- i32 addrspace(4)* %agent_out,
+ i32* %agent_out,
i32 %workgroup,
- i32 addrspace(4)* %workgroup_out,
+ i32* %workgroup_out,
i32 %wavefront,
- i32 addrspace(4)* %wavefront_out) {
+ i32* %wavefront_out) {
entry:
- store atomic i32 %agent, i32 addrspace(4)* %agent_out syncscope("agent") seq_cst, align 4
- store atomic i32 %workgroup, i32 addrspace(4)* %workgroup_out syncscope("workgroup") seq_cst, align 4
- store atomic i32 %wavefront, i32 addrspace(4)* %wavefront_out syncscope("wavefront") seq_cst, align 4
+ store atomic i32 %agent, i32* %agent_out syncscope("agent") seq_cst, align 4
+ store atomic i32 %workgroup, i32* %workgroup_out syncscope("workgroup") seq_cst, align 4
+ store atomic i32 %wavefront, i32* %wavefront_out syncscope("wavefront") seq_cst, align 4
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/target-cpu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/target-cpu.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/target-cpu.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/target-cpu.ll Fri Feb 2 08:07:16 2018
@@ -81,10 +81,10 @@ define amdgpu_kernel void @target_fiji()
; CHECK: ; LDSByteSize: 5120
define amdgpu_kernel void @promote_alloca_enabled(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #5 {
entry:
- %stack = alloca [5 x i32], align 4
+ %stack = alloca [5 x i32], align 4, addrspace(5)
%tmp = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp
- %load = load i32, i32* %arrayidx1
+ %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp
+ %load = load i32, i32 addrspace(5)* %arrayidx1
store i32 %load, i32 addrspace(1)* %out
ret void
}
@@ -95,10 +95,10 @@ entry:
; CHECK: ScratchSize: 24
define amdgpu_kernel void @promote_alloca_disabled(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #6 {
entry:
- %stack = alloca [5 x i32], align 4
+ %stack = alloca [5 x i32], align 4, addrspace(5)
%tmp = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp
- %load = load i32, i32* %arrayidx1
+ %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp
+ %load = load i32, i32 addrspace(5)* %arrayidx1
store i32 %load, i32 addrspace(1)* %out
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir Fri Feb 2 08:07:16 2018
@@ -48,7 +48,7 @@ body: |
$sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec
$vgpr1 = V_CNDMASK_B32_e64 0, -1, killed $sgpr0_sgpr1, implicit $exec
$sgpr0_sgpr1 = COPY $exec, implicit-def $exec
- SI_SPILL_S64_SAVE $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4)
+ SI_SPILL_S64_SAVE $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5)
$sgpr2_sgpr3 = S_AND_B64 killed $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
SI_MASK_BRANCH %bb.2, implicit $exec
@@ -66,7 +66,7 @@ body: |
bb.2:
successors:
- $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (load 8 from %stack.0, align 4)
+ $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (load 8 from %stack.0, align 4, addrspace 5)
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
bb.3:
@@ -82,7 +82,7 @@ body: |
# CHECK-LABEL: {{^}}name: undefined_physreg_sgpr_spill_reorder
# CHECK: $sgpr0_sgpr1 = COPY $exec, implicit-def $exec
# CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc
-# CHECK: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4)
+# CHECK: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5)
# CHECK: $exec = COPY killed $sgpr2_sgpr3
name: undefined_physreg_sgpr_spill_reorder
alignment: 0
@@ -112,7 +112,7 @@ body: |
$vgpr1 = V_CNDMASK_B32_e64 0, -1, killed $sgpr0_sgpr1, implicit $exec
$sgpr0_sgpr1 = COPY $exec, implicit-def $exec
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc
- SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4)
+ SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5)
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
SI_MASK_BRANCH %bb.2, implicit $exec
S_BRANCH %bb.1
@@ -129,7 +129,7 @@ body: |
bb.2:
successors:
- $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (load 8 from %stack.0, align 4)
+ $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (load 8 from %stack.0, align 4, addrspace 5)
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
bb.3:
Modified: llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir Fri Feb 2 08:07:16 2018
@@ -2,32 +2,32 @@
--- |
define amdgpu_kernel void @fold_fi_vgpr() {
- %alloca = alloca [4 x i32]
+ %alloca = alloca [4 x i32], addrspace(5)
ret void
}
define amdgpu_kernel void @fold_vgpr_fi() {
- %alloca = alloca [4 x i32]
+ %alloca = alloca [4 x i32], addrspace(5)
ret void
}
define amdgpu_kernel void @fold_sgpr_fi() {
- %alloca = alloca [4 x i32]
+ %alloca = alloca [4 x i32], addrspace(5)
ret void
}
define amdgpu_kernel void @fold_fi_sgpr() {
- %alloca = alloca [4 x i32]
+ %alloca = alloca [4 x i32], addrspace(5)
ret void
}
define amdgpu_kernel void @fold_fi_imm() {
- %alloca = alloca [4 x i32]
+ %alloca = alloca [4 x i32], addrspace(5)
ret void
}
define amdgpu_kernel void @fold_imm_fi() {
- %alloca = alloca [4 x i32]
+ %alloca = alloca [4 x i32], addrspace(5)
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-flat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/waitcnt-flat.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-flat.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-flat.ll Fri Feb 2 08:07:16 2018
@@ -10,13 +10,13 @@
; XGCN: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[DATA:v[0-9]+]]
; XGCN: s_waitcnt vmcnt(0) lgkmcnt(0)
; XGCN: flat_load_dword [[DATA]], v[{{[0-9]+:[0-9]+}}]
-define amdgpu_kernel void @test(i32 addrspace(4)* %out, i32 %in) {
- store volatile i32 0, i32 addrspace(4)* %out
- %val = load volatile i32, i32 addrspace(4)* %out
+define amdgpu_kernel void @test(i32* %out, i32 %in) {
+ store volatile i32 0, i32* %out
+ %val = load volatile i32, i32* %out
ret void
}
-; Make sure lgkmcnt isn't used for global_* instructions
+; Make sure lgkmcnt isn't used for global_ addrspace(5)* instructions
; GCN-LABEL: {{^}}test_waitcnt_type_flat_global:
; GFX9: global_load_dword [[LD:v[0-9]+]]
; GFX9-NEXT: s_waitcnt vmcnt(0){{$}}
Modified: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-looptest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/waitcnt-looptest.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-looptest.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-looptest.ll Fri Feb 2 08:07:16 2018
@@ -17,8 +17,8 @@
define amdgpu_kernel void @testKernel(i32 addrspace(1)* nocapture %arg) local_unnamed_addr #0 {
bb:
- store <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float> addrspace(4)* bitcast (float addrspace(4)* getelementptr ([100 x float], [100 x float] addrspace(4)* addrspacecast ([100 x float] addrspace(1)* @data_generic to [100 x float] addrspace(4)*), i64 0, i64 4) to <2 x float> addrspace(4)*), align 4
- store <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float> addrspace(4)* bitcast (float addrspace(4)* getelementptr ([100 x float], [100 x float] addrspace(4)* addrspacecast ([100 x float] addrspace(1)* @data_reference to [100 x float] addrspace(4)*), i64 0, i64 4) to <2 x float> addrspace(4)*), align 4
+ store <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float>* bitcast (float* getelementptr ([100 x float], [100 x float]* addrspacecast ([100 x float] addrspace(1)* @data_generic to [100 x float]*), i64 0, i64 4) to <2 x float>*), align 4
+ store <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float>* bitcast (float* getelementptr ([100 x float], [100 x float]* addrspacecast ([100 x float] addrspace(1)* @data_reference to [100 x float]*), i64 0, i64 4) to <2 x float>*), align 4
br label %bb18
bb1: ; preds = %bb18
Modified: llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir Fri Feb 2 08:07:16 2018
@@ -3,8 +3,8 @@
--- |
define amdgpu_kernel void @flat_zero_waitcnt(i32 addrspace(1)* %global4,
<4 x i32> addrspace(1)* %global16,
- i32 addrspace(4)* %flat4,
- <4 x i32> addrspace(4)* %flat16) {
+ i32* %flat4,
+ <4 x i32>* %flat16) {
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/wqm.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/wqm.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/wqm.ll Fri Feb 2 08:07:16 2018
@@ -657,17 +657,17 @@ break:
; CHECK: buffer_store_dwordx4
define amdgpu_ps void @test_alloca(float %data, i32 %a, i32 %idx) nounwind {
entry:
- %array = alloca [32 x i32], align 4
+ %array = alloca [32 x i32], align 4, addrspace(5)
call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
- %s.gep = getelementptr [32 x i32], [32 x i32]* %array, i32 0, i32 0
- store volatile i32 %a, i32* %s.gep, align 4
+ %s.gep = getelementptr [32 x i32], [32 x i32] addrspace(5)* %array, i32 0, i32 0
+ store volatile i32 %a, i32 addrspace(5)* %s.gep, align 4
call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 1, i32 0, i1 0, i1 0)
- %c.gep = getelementptr [32 x i32], [32 x i32]* %array, i32 0, i32 %idx
- %c = load i32, i32* %c.gep, align 4
+ %c.gep = getelementptr [32 x i32], [32 x i32] addrspace(5)* %array, i32 0, i32 %idx
+ %c = load i32, i32 addrspace(5)* %c.gep, align 4
%c.bc = bitcast i32 %c to float
%t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %t, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
Modified: llvm/trunk/test/DebugInfo/AMDGPU/code-pointer-size.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/AMDGPU/code-pointer-size.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/DebugInfo/AMDGPU/code-pointer-size.ll (original)
+++ llvm/trunk/test/DebugInfo/AMDGPU/code-pointer-size.ll Fri Feb 2 08:07:16 2018
@@ -4,11 +4,11 @@
;
; $clang -cl-std=CL2.0 -g -O0 -target amdgcn-amd-amdhsa -S -emit-llvm <path-to-file>
;
-; kernel void kernel1(global int *A) {
+; kernel void kernel1(global int addrspace(5)*A) {
; *A = 11;
; }
;
-; kernel void kernel2(global int *B) {
+; kernel void kernel2(global int addrspace(5)*B) {
; *B = 12;
; }
@@ -20,20 +20,20 @@ declare void @llvm.dbg.declare(metadata,
define amdgpu_kernel void @kernel1(i32 addrspace(1)* %A) !dbg !7 {
entry:
- %A.addr = alloca i32 addrspace(1)*, align 4
- store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
- call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !16, metadata !17), !dbg !18
- %0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !19
+ %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !16, metadata !17), !dbg !18
+ %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !19
store i32 11, i32 addrspace(1)* %0, align 4, !dbg !20
ret void, !dbg !21
}
define amdgpu_kernel void @kernel2(i32 addrspace(1)* %B) !dbg !22 {
entry:
- %B.addr = alloca i32 addrspace(1)*, align 4
- store i32 addrspace(1)* %B, i32 addrspace(1)** %B.addr, align 4
- call void @llvm.dbg.declare(metadata i32 addrspace(1)** %B.addr, metadata !23, metadata !17), !dbg !24
- %0 = load i32 addrspace(1)*, i32 addrspace(1)** %B.addr, align 4, !dbg !25
+ %B.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ store i32 addrspace(1)* %B, i32 addrspace(1)* addrspace(5)* %B.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %B.addr, metadata !23, metadata !17), !dbg !24
+ %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %B.addr, align 4, !dbg !25
store i32 12, i32 addrspace(1)* %0, align 4, !dbg !26
ret void, !dbg !27
}
@@ -57,7 +57,7 @@ entry:
!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!12 = !{i32 1}
!13 = !{!"none"}
-!14 = !{!"int*"}
+!14 = !{!"int addrspace(5)*"}
!15 = !{!""}
!16 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10)
!17 = !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)
Modified: llvm/trunk/test/DebugInfo/AMDGPU/dwarfdump-relocs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/AMDGPU/dwarfdump-relocs.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/DebugInfo/AMDGPU/dwarfdump-relocs.ll (original)
+++ llvm/trunk/test/DebugInfo/AMDGPU/dwarfdump-relocs.ll Fri Feb 2 08:07:16 2018
@@ -4,11 +4,11 @@
;
; $clang -cl-std=CL2.0 -g -O0 -target amdgcn-amd-amdhsa -S -emit-llvm <path-to-file>
;
-; kernel void kernel1(global int *A) {
+; kernel void kernel1(global int addrspace(5)*A) {
; *A = 11;
; }
;
-; kernel void kernel2(global int *B) {
+; kernel void kernel2(global int addrspace(5)*B) {
; *B = 12;
; }
@@ -19,20 +19,20 @@ declare void @llvm.dbg.declare(metadata,
define amdgpu_kernel void @kernel1(i32 addrspace(1)* %A) !dbg !7 {
entry:
- %A.addr = alloca i32 addrspace(1)*, align 4
- store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
- call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !16, metadata !17), !dbg !18
- %0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !19
+ %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !16, metadata !17), !dbg !18
+ %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4, !dbg !19
store i32 11, i32 addrspace(1)* %0, align 4, !dbg !20
ret void, !dbg !21
}
define amdgpu_kernel void @kernel2(i32 addrspace(1)* %B) !dbg !22 {
entry:
- %B.addr = alloca i32 addrspace(1)*, align 4
- store i32 addrspace(1)* %B, i32 addrspace(1)** %B.addr, align 4
- call void @llvm.dbg.declare(metadata i32 addrspace(1)** %B.addr, metadata !23, metadata !17), !dbg !24
- %0 = load i32 addrspace(1)*, i32 addrspace(1)** %B.addr, align 4, !dbg !25
+ %B.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ store i32 addrspace(1)* %B, i32 addrspace(1)* addrspace(5)* %B.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %B.addr, metadata !23, metadata !17), !dbg !24
+ %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %B.addr, align 4, !dbg !25
store i32 12, i32 addrspace(1)* %0, align 4, !dbg !26
ret void, !dbg !27
}
@@ -56,7 +56,7 @@ entry:
!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!12 = !{i32 1}
!13 = !{!"none"}
-!14 = !{!"int*"}
+!14 = !{!"int addrspace(5)*"}
!15 = !{!""}
!16 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10)
!17 = !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)
Modified: llvm/trunk/test/DebugInfo/AMDGPU/pointer-address-space.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/AMDGPU/pointer-address-space.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/DebugInfo/AMDGPU/pointer-address-space.ll (original)
+++ llvm/trunk/test/DebugInfo/AMDGPU/pointer-address-space.ll Fri Feb 2 08:07:16 2018
@@ -5,11 +5,11 @@
; $clang -cl-std=CL2.0 -g -O0 -target amdgcn-amd-amdhsa -S -emit-llvm <path-to-file>
;
; kernel void kernel1() {
-; global int *FuncVar0 = 0;
-; constant int *FuncVar1 = 0;
-; local int *FuncVar2 = 0;
-; private int *FuncVar3 = 0;
-; int *FuncVar4 = 0;
+; global int addrspace(5)*FuncVar0 = 0;
+; constant int addrspace(5)*FuncVar1 = 0;
+; local int addrspace(5)*FuncVar2 = 0;
+; private int addrspace(5)*FuncVar3 = 0;
+; int addrspace(5)*FuncVar4 = 0;
; }
; CHECK: DW_AT_name {{.*}}"FuncVar0"
@@ -53,21 +53,21 @@ declare void @llvm.dbg.declare(metadata,
define amdgpu_kernel void @kernel1() !dbg !7 {
entry:
- %FuncVar0 = alloca i32 addrspace(1)*, align 4
- %FuncVar1 = alloca i32 addrspace(2)*, align 4
- %FuncVar2 = alloca i32 addrspace(3)*, align 4
- %FuncVar3 = alloca i32*, align 4
- %FuncVar4 = alloca i32 addrspace(4)*, align 4
- call void @llvm.dbg.declare(metadata i32 addrspace(1)** %FuncVar0, metadata !10, metadata !13), !dbg !14
- store i32 addrspace(1)* null, i32 addrspace(1)** %FuncVar0, align 4, !dbg !14
- call void @llvm.dbg.declare(metadata i32 addrspace(2)** %FuncVar1, metadata !15, metadata !13), !dbg !16
- store i32 addrspace(2)* null, i32 addrspace(2)** %FuncVar1, align 4, !dbg !16
- call void @llvm.dbg.declare(metadata i32 addrspace(3)** %FuncVar2, metadata !17, metadata !13), !dbg !19
- store i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), i32 addrspace(3)** %FuncVar2, align 4, !dbg !19
- call void @llvm.dbg.declare(metadata i32** %FuncVar3, metadata !20, metadata !13), !dbg !22
- store i32* addrspacecast (i32 addrspace(4)* null to i32*), i32** %FuncVar3, align 4, !dbg !22
- call void @llvm.dbg.declare(metadata i32 addrspace(4)** %FuncVar4, metadata !23, metadata !13), !dbg !24
- store i32 addrspace(4)* null, i32 addrspace(4)** %FuncVar4, align 4, !dbg !24
+ %FuncVar0 = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ %FuncVar1 = alloca i32 addrspace(2)*, align 4, addrspace(5)
+ %FuncVar2 = alloca i32 addrspace(3)*, align 4, addrspace(5)
+ %FuncVar3 = alloca i32 addrspace(5)*, align 4, addrspace(5)
+ %FuncVar4 = alloca i32*, align 4, addrspace(5)
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %FuncVar0, metadata !10, metadata !13), !dbg !14
+ store i32 addrspace(1)* null, i32 addrspace(1)* addrspace(5)* %FuncVar0, align 4, !dbg !14
+ call void @llvm.dbg.declare(metadata i32 addrspace(2)* addrspace(5)* %FuncVar1, metadata !15, metadata !13), !dbg !16
+ store i32 addrspace(2)* null, i32 addrspace(2)* addrspace(5)* %FuncVar1, align 4, !dbg !16
+ call void @llvm.dbg.declare(metadata i32 addrspace(3)* addrspace(5)* %FuncVar2, metadata !17, metadata !13), !dbg !19
+ store i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*), i32 addrspace(3)* addrspace(5)* %FuncVar2, align 4, !dbg !19
+ call void @llvm.dbg.declare(metadata i32 addrspace(5)* addrspace(5)* %FuncVar3, metadata !20, metadata !13), !dbg !22
+ store i32 addrspace(5)* addrspacecast (i32* null to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %FuncVar3, align 4, !dbg !22
+ call void @llvm.dbg.declare(metadata i32* addrspace(5)* %FuncVar4, metadata !23, metadata !13), !dbg !24
+ store i32* null, i32* addrspace(5)* %FuncVar4, align 4, !dbg !24
ret void, !dbg !25
}
Modified: llvm/trunk/test/DebugInfo/AMDGPU/variable-locations.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/AMDGPU/variable-locations.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/DebugInfo/AMDGPU/variable-locations.ll (original)
+++ llvm/trunk/test/DebugInfo/AMDGPU/variable-locations.ll Fri Feb 2 08:07:16 2018
@@ -7,7 +7,7 @@
; global int GlobA;
; global int GlobB;
;
-; kernel void kernel1(unsigned int ArgN, global int *ArgA, global int *ArgB) {
+; kernel void kernel1(unsigned int ArgN, global int addrspace(5)*ArgA, global int addrspace(5)*ArgB) {
; ArgA[ArgN] += ArgB[ArgN];
; }
@@ -45,22 +45,22 @@ define amdgpu_kernel void @kernel1(
; CHECK-NEXT: DW_AT_name {{.*}}"ArgB"
i32 addrspace(1)* %ArgB) !dbg !13 {
entry:
- %ArgN.addr = alloca i32, align 4
- %ArgA.addr = alloca i32 addrspace(1)*, align 4
- %ArgB.addr = alloca i32 addrspace(1)*, align 4
- store i32 %ArgN, i32* %ArgN.addr, align 4
- call void @llvm.dbg.declare(metadata i32* %ArgN.addr, metadata !22, metadata !23), !dbg !24
- store i32 addrspace(1)* %ArgA, i32 addrspace(1)** %ArgA.addr, align 4
- call void @llvm.dbg.declare(metadata i32 addrspace(1)** %ArgA.addr, metadata !25, metadata !23), !dbg !26
- store i32 addrspace(1)* %ArgB, i32 addrspace(1)** %ArgB.addr, align 4
- call void @llvm.dbg.declare(metadata i32 addrspace(1)** %ArgB.addr, metadata !27, metadata !23), !dbg !28
- %0 = load i32 addrspace(1)*, i32 addrspace(1)** %ArgB.addr, align 4, !dbg !29
- %1 = load i32, i32* %ArgN.addr, align 4, !dbg !30
+ %ArgN.addr = alloca i32, align 4, addrspace(5)
+ %ArgA.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ %ArgB.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ store i32 %ArgN, i32 addrspace(5)* %ArgN.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(5)* %ArgN.addr, metadata !22, metadata !23), !dbg !24
+ store i32 addrspace(1)* %ArgA, i32 addrspace(1)* addrspace(5)* %ArgA.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %ArgA.addr, metadata !25, metadata !23), !dbg !26
+ store i32 addrspace(1)* %ArgB, i32 addrspace(1)* addrspace(5)* %ArgB.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %ArgB.addr, metadata !27, metadata !23), !dbg !28
+ %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %ArgB.addr, align 4, !dbg !29
+ %1 = load i32, i32 addrspace(5)* %ArgN.addr, align 4, !dbg !30
%idxprom = zext i32 %1 to i64, !dbg !29
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 %idxprom, !dbg !29
%2 = load i32, i32 addrspace(1)* %arrayidx, align 4, !dbg !29
- %3 = load i32 addrspace(1)*, i32 addrspace(1)** %ArgA.addr, align 4, !dbg !31
- %4 = load i32, i32* %ArgN.addr, align 4, !dbg !32
+ %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %ArgA.addr, align 4, !dbg !31
+ %4 = load i32, i32 addrspace(5)* %ArgN.addr, align 4, !dbg !32
%idxprom1 = zext i32 %4 to i64, !dbg !31
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %3, i64 %idxprom1, !dbg !31
%5 = load i32, i32 addrspace(1)* %arrayidx2, align 4, !dbg !33
@@ -94,7 +94,7 @@ entry:
!17 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64)
!18 = !{i32 0, i32 1, i32 1}
!19 = !{!"none", !"none", !"none"}
-!20 = !{!"uint", !"int*", !"int*"}
+!20 = !{!"uint", !"int addrspace(5)*", !"int addrspace(5)*"}
!21 = !{!"", !"", !""}
!22 = !DILocalVariable(name: "ArgN", arg: 1, scope: !13, file: !3, line: 4, type: !16)
!23 = !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)
Modified: llvm/trunk/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll (original)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll Fri Feb 2 08:07:16 2018
@@ -5,7 +5,7 @@
; CHECK: br
; CHECK-NOT: addrspacecast
define i64 @no_sink_local_to_flat(i1 %pred, i64 addrspace(3)* %ptr) {
- %ptr_cast = addrspacecast i64 addrspace(3)* %ptr to i64 addrspace(4)*
+ %ptr_cast = addrspacecast i64 addrspace(3)* %ptr to i64*
br i1 %pred, label %l1, label %l2
l1:
@@ -13,7 +13,7 @@ l1:
ret i64 %v1
l2:
- %v2 = load i64, i64 addrspace(4)* %ptr_cast
+ %v2 = load i64, i64* %ptr_cast
ret i64 %v2
}
@@ -21,16 +21,16 @@ l2:
; CHECK: addrspacecast
; CHECK: br
; CHECK-NOT: addrspacecast
-define i64 @no_sink_private_to_flat(i1 %pred, i64* %ptr) {
- %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(4)*
+define i64 @no_sink_private_to_flat(i1 %pred, i64 addrspace(5)* %ptr) {
+ %ptr_cast = addrspacecast i64 addrspace(5)* %ptr to i64*
br i1 %pred, label %l1, label %l2
l1:
- %v1 = load i64, i64* %ptr
+ %v1 = load i64, i64 addrspace(5)* %ptr
ret i64 %v1
l2:
- %v2 = load i64, i64 addrspace(4)* %ptr_cast
+ %v2 = load i64, i64* %ptr_cast
ret i64 %v2
}
@@ -40,7 +40,7 @@ l2:
; CHECK: br
; CHECK: addrspacecast
define i64 @sink_global_to_flat(i1 %pred, i64 addrspace(1)* %ptr) {
- %ptr_cast = addrspacecast i64 addrspace(1)* %ptr to i64 addrspace(4)*
+ %ptr_cast = addrspacecast i64 addrspace(1)* %ptr to i64*
br i1 %pred, label %l1, label %l2
l1:
@@ -48,7 +48,7 @@ l1:
ret i64 %v1
l2:
- %v2 = load i64, i64 addrspace(4)* %ptr_cast
+ %v2 = load i64, i64* %ptr_cast
ret i64 %v2
}
@@ -56,12 +56,12 @@ l2:
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
-define i64 @sink_flat_to_global(i1 %pred, i64 addrspace(4)* %ptr) {
- %ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64 addrspace(1)*
+define i64 @sink_flat_to_global(i1 %pred, i64* %ptr) {
+ %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(1)*
br i1 %pred, label %l1, label %l2
l1:
- %v1 = load i64, i64 addrspace(4)* %ptr
+ %v1 = load i64, i64* %ptr
ret i64 %v1
l2:
@@ -73,12 +73,12 @@ l2:
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
-define i64 @sink_flat_to_constant(i1 %pred, i64 addrspace(4)* %ptr) {
- %ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64 addrspace(2)*
+define i64 @sink_flat_to_constant(i1 %pred, i64* %ptr) {
+ %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(2)*
br i1 %pred, label %l1, label %l2
l1:
- %v1 = load i64, i64 addrspace(4)* %ptr
+ %v1 = load i64, i64* %ptr
ret i64 %v1
l2:
@@ -90,12 +90,12 @@ l2:
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
-define i64 @sink_flat_to_local(i1 %pred, i64 addrspace(4)* %ptr) {
- %ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64 addrspace(3)*
+define i64 @sink_flat_to_local(i1 %pred, i64* %ptr) {
+ %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(3)*
br i1 %pred, label %l1, label %l2
l1:
- %v1 = load i64, i64 addrspace(4)* %ptr
+ %v1 = load i64, i64* %ptr
ret i64 %v1
l2:
@@ -107,15 +107,15 @@ l2:
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
-define i64 @sink_flat_to_private(i1 %pred, i64 addrspace(4)* %ptr) {
- %ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64*
+define i64 @sink_flat_to_private(i1 %pred, i64* %ptr) {
+ %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(5)*
br i1 %pred, label %l1, label %l2
l1:
- %v1 = load i64, i64 addrspace(4)* %ptr
+ %v1 = load i64, i64* %ptr
ret i64 %v1
l2:
- %v2 = load i64, i64* %ptr_cast
+ %v2 = load i64, i64 addrspace(5)* %ptr_cast
ret i64 %v2
}
Modified: llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll (original)
+++ llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll Fri Feb 2 08:07:16 2018
@@ -3,69 +3,69 @@
; Trivial optimization of generic addressing
; CHECK-LABEL: @load_global_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)*
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)*
; CHECK-NEXT: %tmp1 = load float, float addrspace(1)* %tmp0
; CHECK-NEXT: ret float %tmp1
-define float @load_global_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)*
+define float @load_global_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)*
%tmp1 = load float, float addrspace(1)* %tmp0
ret float %tmp1
}
; CHECK-LABEL: @load_constant_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(2)*
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(2)*
; CHECK-NEXT: %tmp1 = load float, float addrspace(2)* %tmp0
; CHECK-NEXT: ret float %tmp1
-define float @load_constant_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(2)*
+define float @load_constant_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(2)*
%tmp1 = load float, float addrspace(2)* %tmp0
ret float %tmp1
}
; CHECK-LABEL: @load_group_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)*
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)*
; CHECK-NEXT: %tmp1 = load float, float addrspace(3)* %tmp0
; CHECK-NEXT: ret float %tmp1
-define float @load_group_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)*
+define float @load_group_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)*
%tmp1 = load float, float addrspace(3)* %tmp0
ret float %tmp1
}
; CHECK-LABEL: @load_private_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float*
-; CHECK-NEXT: %tmp1 = load float, float* %tmp0
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)*
+; CHECK-NEXT: %tmp1 = load float, float addrspace(5)* %tmp0
; CHECK-NEXT: ret float %tmp1
-define float @load_private_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float*
- %tmp1 = load float, float* %tmp0
+define float @load_private_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)*
+ %tmp1 = load float, float addrspace(5)* %tmp0
ret float %tmp1
}
; CHECK-LABEL: @store_global_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)*
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)*
; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* %tmp0
-define amdgpu_kernel void @store_global_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)*
+define amdgpu_kernel void @store_global_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)*
store float 0.0, float addrspace(1)* %tmp0
ret void
}
; CHECK-LABEL: @store_group_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)*
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)*
; CHECK-NEXT: store float 0.000000e+00, float addrspace(3)* %tmp0
-define amdgpu_kernel void @store_group_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)*
+define amdgpu_kernel void @store_group_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)*
store float 0.0, float addrspace(3)* %tmp0
ret void
}
; CHECK-LABEL: @store_private_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float*
-; CHECK-NEXT: store float 0.000000e+00, float* %tmp0
-define amdgpu_kernel void @store_private_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float*
- store float 0.0, float* %tmp0
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)*
+; CHECK-NEXT: store float 0.000000e+00, float addrspace(5)* %tmp0
+define amdgpu_kernel void @store_private_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)*
+ store float 0.0, float addrspace(5)* %tmp0
ret void
}
@@ -75,10 +75,10 @@ define amdgpu_kernel void @store_private
; CHECK-NEXT: store i32 %val, i32 addrspace(1)* %output, align 4
; CHECK-NEXT: ret void
define amdgpu_kernel void @load_store_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)*
- %val = load i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(1)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(1)* %output to i32*
+ %val = load i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
@@ -88,95 +88,95 @@ define amdgpu_kernel void @load_store_gl
; CHECK-NEXT: store i32 %val, i32 addrspace(3)* %output, align 4
; CHECK-NEXT: ret void
define amdgpu_kernel void @load_store_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)*
- %val = load i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(3)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(3)* %output to i32*
+ %val = load i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
; Optimized to private load/store.
; CHECK-LABEL: @load_store_private(
-; CHECK-NEXT: %val = load i32, i32* %input, align 4
-; CHECK-NEXT: store i32 %val, i32* %output, align 4
+; CHECK-NEXT: %val = load i32, i32 addrspace(5)* %input, align 4
+; CHECK-NEXT: store i32 %val, i32 addrspace(5)* %output, align 4
; CHECK-NEXT: ret void
-define amdgpu_kernel void @load_store_private(i32* nocapture %input, i32* nocapture %output) #0 {
- %tmp0 = addrspacecast i32* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32* %output to i32 addrspace(4)*
- %val = load i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+define amdgpu_kernel void @load_store_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 {
+ %tmp0 = addrspacecast i32 addrspace(5)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(5)* %output to i32*
+ %val = load i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
; No optimization. flat load/store.
; CHECK-LABEL: @load_store_flat(
-; CHECK-NEXT: %val = load i32, i32 addrspace(4)* %input, align 4
-; CHECK-NEXT: store i32 %val, i32 addrspace(4)* %output, align 4
+; CHECK-NEXT: %val = load i32, i32* %input, align 4
+; CHECK-NEXT: store i32 %val, i32* %output, align 4
; CHECK-NEXT: ret void
-define amdgpu_kernel void @load_store_flat(i32 addrspace(4)* nocapture %input, i32 addrspace(4)* nocapture %output) #0 {
- %val = load i32, i32 addrspace(4)* %input, align 4
- store i32 %val, i32 addrspace(4)* %output, align 4
+define amdgpu_kernel void @load_store_flat(i32* nocapture %input, i32* nocapture %output) #0 {
+ %val = load i32, i32* %input, align 4
+ store i32 %val, i32* %output, align 4
ret void
}
; CHECK-LABEL: @store_addrspacecast_ptr_value(
-; CHECK: %cast = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
-; CHECK-NEXT: store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %output, align 4
-define amdgpu_kernel void @store_addrspacecast_ptr_value(i32 addrspace(1)* nocapture %input, i32 addrspace(4)* addrspace(1)* nocapture %output) #0 {
- %cast = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
- store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %output, align 4
+; CHECK: %cast = addrspacecast i32 addrspace(1)* %input to i32*
+; CHECK-NEXT: store i32* %cast, i32* addrspace(1)* %output, align 4
+define amdgpu_kernel void @store_addrspacecast_ptr_value(i32 addrspace(1)* nocapture %input, i32* addrspace(1)* nocapture %output) #0 {
+ %cast = addrspacecast i32 addrspace(1)* %input to i32*
+ store i32* %cast, i32* addrspace(1)* %output, align 4
ret void
}
; CHECK-LABEL: @atomicrmw_add_global_to_flat(
; CHECK-NEXT: %ret = atomicrmw add i32 addrspace(1)* %global.ptr, i32 %y seq_cst
define i32 @atomicrmw_add_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %y) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = atomicrmw add i32 addrspace(4)* %cast, i32 %y seq_cst
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = atomicrmw add i32* %cast, i32 %y seq_cst
ret i32 %ret
}
; CHECK-LABEL: @atomicrmw_add_group_to_flat(
; CHECK-NEXT: %ret = atomicrmw add i32 addrspace(3)* %group.ptr, i32 %y seq_cst
define i32 @atomicrmw_add_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %y) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = atomicrmw add i32 addrspace(4)* %cast, i32 %y seq_cst
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = atomicrmw add i32* %cast, i32 %y seq_cst
ret i32 %ret
}
; CHECK-LABEL: @cmpxchg_global_to_flat(
; CHECK: %ret = cmpxchg i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val seq_cst monotonic
define { i32, i1 } @cmpxchg_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = cmpxchg i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = cmpxchg i32* %cast, i32 %cmp, i32 %val seq_cst monotonic
ret { i32, i1 } %ret
}
; CHECK-LABEL: @cmpxchg_group_to_flat(
; CHECK: %ret = cmpxchg i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val seq_cst monotonic
define { i32, i1 } @cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = cmpxchg i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = cmpxchg i32* %cast, i32 %cmp, i32 %val seq_cst monotonic
ret { i32, i1 } %ret
}
; Not pointer operand
; CHECK-LABEL: @cmpxchg_group_to_flat_wrong_operand(
-; CHECK: %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32 addrspace(4)*
-; CHECK: %ret = cmpxchg i32 addrspace(4)* addrspace(3)* %cas.ptr, i32 addrspace(4)* %cast.cmp, i32 addrspace(4)* %val seq_cst monotonic
-define { i32 addrspace(4)*, i1 } @cmpxchg_group_to_flat_wrong_operand(i32 addrspace(4)* addrspace(3)* %cas.ptr, i32 addrspace(3)* %cmp.ptr, i32 addrspace(4)* %val) #0 {
- %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32 addrspace(4)*
- %ret = cmpxchg i32 addrspace(4)* addrspace(3)* %cas.ptr, i32 addrspace(4)* %cast.cmp, i32 addrspace(4)* %val seq_cst monotonic
- ret { i32 addrspace(4)*, i1 } %ret
+; CHECK: %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32*
+; CHECK: %ret = cmpxchg i32* addrspace(3)* %cas.ptr, i32* %cast.cmp, i32* %val seq_cst monotonic
+define { i32*, i1 } @cmpxchg_group_to_flat_wrong_operand(i32* addrspace(3)* %cas.ptr, i32 addrspace(3)* %cmp.ptr, i32* %val) #0 {
+ %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32*
+ %ret = cmpxchg i32* addrspace(3)* %cas.ptr, i32* %cast.cmp, i32* %val seq_cst monotonic
+ ret { i32*, i1 } %ret
}
; Null pointer in local addr space
; CHECK-LABEL: @local_nullptr
-; CHECK: icmp ne i8 addrspace(3)* %a, addrspacecast (i8* null to i8 addrspace(3)*)
+; CHECK: icmp ne i8 addrspace(3)* %a, addrspacecast (i8 addrspace(5)* null to i8 addrspace(3)*)
; CHECK-NOT: i8 addrspace(3)* null
define void @local_nullptr(i32 addrspace(1)* nocapture %results, i8 addrspace(3)* %a) {
entry:
- %tobool = icmp ne i8 addrspace(3)* %a, addrspacecast (i8* null to i8 addrspace(3)*)
+ %tobool = icmp ne i8 addrspace(3)* %a, addrspacecast (i8 addrspace(5)* null to i8 addrspace(3)*)
%conv = zext i1 %tobool to i32
store i32 %conv, i32 addrspace(1)* %results, align 4
ret void
Modified: llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll (original)
+++ llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll Fri Feb 2 08:07:16 2018
@@ -3,57 +3,57 @@
; CHECK-LABEL: @icmp_flat_cmp_self(
; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, %group.ptr.0
define i1 @icmp_flat_cmp_self(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, %cast0
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, %cast0
ret i1 %cmp
}
; CHECK-LABEL: @icmp_flat_flat_from_group(
; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, %group.ptr.1
define i1 @icmp_flat_flat_from_group(i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, %cast1
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+ %cmp = icmp eq i32* %cast0, %cast1
ret i1 %cmp
}
; CHECK-LABEL: @icmp_mismatch_flat_from_group_private(
-; CHECK: %1 = addrspacecast i32* %private.ptr.0 to i32 addrspace(4)*
-; CHECK: %2 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
-; CHECK: %cmp = icmp eq i32 addrspace(4)* %1, %2
-define i1 @icmp_mismatch_flat_from_group_private(i32* %private.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
- %cast0 = addrspacecast i32* %private.ptr.0 to i32 addrspace(4)*
- %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, %cast1
+; CHECK: %1 = addrspacecast i32 addrspace(5)* %private.ptr.0 to i32*
+; CHECK: %2 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+; CHECK: %cmp = icmp eq i32* %1, %2
+define i1 @icmp_mismatch_flat_from_group_private(i32 addrspace(5)* %private.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
+ %cast0 = addrspacecast i32 addrspace(5)* %private.ptr.0 to i32*
+ %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+ %cmp = icmp eq i32* %cast0, %cast1
ret i1 %cmp
}
; CHECK-LABEL: @icmp_flat_group_flat(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %cmp = icmp eq i32 addrspace(4)* %1, %flat.ptr.1
-define i1 @icmp_flat_group_flat(i32 addrspace(3)* %group.ptr.0, i32 addrspace(4)* %flat.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, %flat.ptr.1
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %cmp = icmp eq i32* %1, %flat.ptr.1
+define i1 @icmp_flat_group_flat(i32 addrspace(3)* %group.ptr.0, i32* %flat.ptr.1) #0 {
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, %flat.ptr.1
ret i1 %cmp
}
; CHECK-LABEL: @icmp_flat_flat_group(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
-; CHECK: %cmp = icmp eq i32 addrspace(4)* %flat.ptr.0, %1
-define i1 @icmp_flat_flat_group(i32 addrspace(4)* %flat.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
- %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %flat.ptr.0, %cast1
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+; CHECK: %cmp = icmp eq i32* %flat.ptr.0, %1
+define i1 @icmp_flat_flat_group(i32* %flat.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
+ %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+ %cmp = icmp eq i32* %flat.ptr.0, %cast1
ret i1 %cmp
}
; Keeping as cmp addrspace(3)* is better
; CHECK-LABEL: @icmp_flat_to_group_cmp(
-; CHECK: %cast0 = addrspacecast i32 addrspace(4)* %flat.ptr.0 to i32 addrspace(3)*
-; CHECK: %cast1 = addrspacecast i32 addrspace(4)* %flat.ptr.1 to i32 addrspace(3)*
+; CHECK: %cast0 = addrspacecast i32* %flat.ptr.0 to i32 addrspace(3)*
+; CHECK: %cast1 = addrspacecast i32* %flat.ptr.1 to i32 addrspace(3)*
; CHECK: %cmp = icmp eq i32 addrspace(3)* %cast0, %cast1
-define i1 @icmp_flat_to_group_cmp(i32 addrspace(4)* %flat.ptr.0, i32 addrspace(4)* %flat.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(4)* %flat.ptr.0 to i32 addrspace(3)*
- %cast1 = addrspacecast i32 addrspace(4)* %flat.ptr.1 to i32 addrspace(3)*
+define i1 @icmp_flat_to_group_cmp(i32* %flat.ptr.0, i32* %flat.ptr.1) #0 {
+ %cast0 = addrspacecast i32* %flat.ptr.0 to i32 addrspace(3)*
+ %cast1 = addrspacecast i32* %flat.ptr.1 to i32 addrspace(3)*
%cmp = icmp eq i32 addrspace(3)* %cast0, %cast1
ret i1 %cmp
}
@@ -62,35 +62,35 @@ define i1 @icmp_flat_to_group_cmp(i32 ad
; constant cast if this is OK to change if 0 is a valid pointer.
; CHECK-LABEL: @icmp_group_flat_cmp_null(
-; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*)
+; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, addrspacecast (i32* null to i32 addrspace(3)*)
define i1 @icmp_group_flat_cmp_null(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, null
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, null
ret i1 %cmp
}
; CHECK-LABEL: @icmp_group_flat_cmp_constant_inttoptr(
-; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, addrspacecast (i32 addrspace(4)* inttoptr (i64 400 to i32 addrspace(4)*) to i32 addrspace(3)*)
+; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, addrspacecast (i32* inttoptr (i64 400 to i32*) to i32 addrspace(3)*)
define i1 @icmp_group_flat_cmp_constant_inttoptr(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, inttoptr (i64 400 to i32 addrspace(4)*)
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, inttoptr (i64 400 to i32*)
ret i1 %cmp
}
; CHECK-LABEL: @icmp_mismatch_flat_group_private_cmp_null(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %cmp = icmp eq i32 addrspace(4)* %1, addrspacecast (i32* null to i32 addrspace(4)*)
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %cmp = icmp eq i32* %1, addrspacecast (i32 addrspace(5)* null to i32*)
define i1 @icmp_mismatch_flat_group_private_cmp_null(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, addrspacecast (i32* null to i32 addrspace(4)*)
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, addrspacecast (i32 addrspace(5)* null to i32*)
ret i1 %cmp
}
; CHECK-LABEL: @icmp_mismatch_flat_group_private_cmp_undef(
; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, undef
define i1 @icmp_mismatch_flat_group_private_cmp_undef(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, addrspacecast (i32* undef to i32 addrspace(4)*)
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, addrspacecast (i32 addrspace(5)* undef to i32*)
ret i1 %cmp
}
@@ -98,62 +98,62 @@ define i1 @icmp_mismatch_flat_group_priv
@global0 = internal addrspace(1) global i32 0, align 4
; CHECK-LABEL: @icmp_mismatch_flat_group_global_cmp_gv(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %cmp = icmp eq i32 addrspace(4)* %1, addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %cmp = icmp eq i32* %1, addrspacecast (i32 addrspace(1)* @global0 to i32*)
define i1 @icmp_mismatch_flat_group_global_cmp_gv(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, addrspacecast (i32 addrspace(1)* @global0 to i32*)
ret i1 %cmp
}
; CHECK-LABEL: @icmp_mismatch_group_global_cmp_gv_gv(
-; CHECK: %cmp = icmp eq i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)
+; CHECK: %cmp = icmp eq i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), addrspacecast (i32 addrspace(1)* @global0 to i32*)
define i1 @icmp_mismatch_group_global_cmp_gv_gv(i32 addrspace(3)* %group.ptr.0) #0 {
- %cmp = icmp eq i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)
+ %cmp = icmp eq i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), addrspacecast (i32 addrspace(1)* @global0 to i32*)
ret i1 %cmp
}
; CHECK-LABEL: @icmp_group_flat_cmp_undef(
; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, undef
define i1 @icmp_group_flat_cmp_undef(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, undef
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, undef
ret i1 %cmp
}
; Test non-canonical orders
; CHECK-LABEL: @icmp_mismatch_flat_group_private_cmp_null_swap(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %cmp = icmp eq i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*), %1
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %cmp = icmp eq i32* addrspacecast (i32 addrspace(5)* null to i32*), %1
define i1 @icmp_mismatch_flat_group_private_cmp_null_swap(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*), %cast0
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* addrspacecast (i32 addrspace(5)* null to i32*), %cast0
ret i1 %cmp
}
; CHECK-LABEL: @icmp_group_flat_cmp_undef_swap(
; CHECK: %cmp = icmp eq i32 addrspace(3)* undef, %group.ptr.0
define i1 @icmp_group_flat_cmp_undef_swap(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* undef, %cast0
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* undef, %cast0
ret i1 %cmp
}
; CHECK-LABEL: @icmp_mismatch_flat_group_private_cmp_undef_swap(
; CHECK: %cmp = icmp eq i32 addrspace(3)* undef, %group.ptr.0
define i1 @icmp_mismatch_flat_group_private_cmp_undef_swap(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* addrspacecast (i32* undef to i32 addrspace(4)*), %cast0
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* addrspacecast (i32 addrspace(5)* undef to i32*), %cast0
ret i1 %cmp
}
; TODO: Should be handled
; CHECK-LABEL: @icmp_flat_flat_from_group_vector(
-; CHECK: %cmp = icmp eq <2 x i32 addrspace(4)*> %cast0, %cast1
+; CHECK: %cmp = icmp eq <2 x i32*> %cast0, %cast1
define <2 x i1> @icmp_flat_flat_from_group_vector(<2 x i32 addrspace(3)*> %group.ptr.0, <2 x i32 addrspace(3)*> %group.ptr.1) #0 {
- %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32 addrspace(4)*>
- %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32 addrspace(4)*>
- %cmp = icmp eq <2 x i32 addrspace(4)*> %cast0, %cast1
+ %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32*>
+ %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32*>
+ %cmp = icmp eq <2 x i32*> %cast0, %cast1
ret <2 x i1> %cmp
}
Modified: llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll (original)
+++ llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll Fri Feb 2 08:07:16 2018
@@ -30,29 +30,29 @@
; CHECK: ret void
define amdgpu_kernel void @load_store_lds_f32(i32 %i, float %v) #0 {
bb:
- %tmp = load float, float addrspace(4)* addrspacecast (float addrspace(3)* @scalar to float addrspace(4)*), align 4
+ %tmp = load float, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
call void @use(float %tmp)
- store float %v, float addrspace(4)* addrspacecast (float addrspace(3)* @scalar to float addrspace(4)*), align 4
+ store float %v, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
call void @llvm.amdgcn.s.barrier()
- %tmp1 = addrspacecast float addrspace(3)* @scalar to float addrspace(4)*
- %tmp2 = load float, float addrspace(4)* %tmp1, align 4
+ %tmp1 = addrspacecast float addrspace(3)* @scalar to float*
+ %tmp2 = load float, float* %tmp1, align 4
call void @use(float %tmp2)
- store float %v, float addrspace(4)* %tmp1, align 4
+ store float %v, float* %tmp1, align 4
call void @llvm.amdgcn.s.barrier()
- %tmp3 = load float, float addrspace(4)* getelementptr inbounds ([10 x float], [10 x float] addrspace(4)* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*), i32 0, i32 5), align 4
+ %tmp3 = load float, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
call void @use(float %tmp3)
- store float %v, float addrspace(4)* getelementptr inbounds ([10 x float], [10 x float] addrspace(4)* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*), i32 0, i32 5), align 4
+ store float %v, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
call void @llvm.amdgcn.s.barrier()
- %tmp4 = getelementptr inbounds [10 x float], [10 x float] addrspace(4)* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*), i32 0, i32 5
- %tmp5 = load float, float addrspace(4)* %tmp4, align 4
+ %tmp4 = getelementptr inbounds [10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5
+ %tmp5 = load float, float* %tmp4, align 4
call void @use(float %tmp5)
- store float %v, float addrspace(4)* %tmp4, align 4
+ store float %v, float* %tmp4, align 4
call void @llvm.amdgcn.s.barrier()
- %tmp6 = addrspacecast [10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*
- %tmp7 = getelementptr inbounds [10 x float], [10 x float] addrspace(4)* %tmp6, i32 0, i32 %i
- %tmp8 = load float, float addrspace(4)* %tmp7, align 4
+ %tmp6 = addrspacecast [10 x float] addrspace(3)* @array to [10 x float]*
+ %tmp7 = getelementptr inbounds [10 x float], [10 x float]* %tmp6, i32 0, i32 %i
+ %tmp8 = load float, float* %tmp7, align 4
call void @use(float %tmp8)
- store float %v, float addrspace(4)* %tmp7, align 4
+ store float %v, float* %tmp7, align 4
call void @llvm.amdgcn.s.barrier()
ret void
}
@@ -61,7 +61,7 @@ bb:
; CHECK: %tmp = load i32, i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*), align 4
define i32 @constexpr_load_int_from_float_lds() #0 {
bb:
- %tmp = load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*) to i32 addrspace(4)*), align 4
+ %tmp = load i32, i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*) to i32*), align 4
ret i32 %tmp
}
@@ -73,18 +73,18 @@ bb:
; CHECK: ret i32 %tmp4
define i32 @load_int_from_global_float(float addrspace(1)* %input, i32 %i, i32 %j) #0 {
bb:
- %tmp = addrspacecast float addrspace(1)* %input to float addrspace(4)*
- %tmp1 = getelementptr float, float addrspace(4)* %tmp, i32 %i
- %tmp2 = getelementptr float, float addrspace(4)* %tmp1, i32 %j
- %tmp3 = bitcast float addrspace(4)* %tmp2 to i32 addrspace(4)*
- %tmp4 = load i32, i32 addrspace(4)* %tmp3
+ %tmp = addrspacecast float addrspace(1)* %input to float*
+ %tmp1 = getelementptr float, float* %tmp, i32 %i
+ %tmp2 = getelementptr float, float* %tmp1, i32 %j
+ %tmp3 = bitcast float* %tmp2 to i32*
+ %tmp4 = load i32, i32* %tmp3
ret i32 %tmp4
}
; CHECK-LABEL: @nested_const_expr(
; CHECK: store i32 1, i32 addrspace(3)* bitcast (float addrspace(3)* getelementptr inbounds ([10 x float], [10 x float] addrspace(3)* @array, i64 0, i64 1) to i32 addrspace(3)*), align 4
define amdgpu_kernel void @nested_const_expr() #0 {
- store i32 1, i32 addrspace(4)* bitcast (float addrspace(4)* getelementptr ([10 x float], [10 x float] addrspace(4)* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*), i64 0, i64 1) to i32 addrspace(4)*), align 4
+ store i32 1, i32* bitcast (float* getelementptr ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i64 0, i64 1) to i32*), align 4
ret void
}
@@ -95,10 +95,10 @@ define amdgpu_kernel void @nested_const_
; CHECK-NEXT: ret void
define amdgpu_kernel void @rauw(float addrspace(1)* %input) #0 {
bb:
- %generic_input = addrspacecast float addrspace(1)* %input to float addrspace(4)*
- %addr = getelementptr float, float addrspace(4)* %generic_input, i64 10
- %v = load float, float addrspace(4)* %addr
- store float %v, float addrspace(4)* %addr
+ %generic_input = addrspacecast float addrspace(1)* %input to float*
+ %addr = getelementptr float, float* %generic_input, i64 10
+ %v = load float, float* %addr
+ store float %v, float* %addr
ret void
}
@@ -119,27 +119,27 @@ bb:
; CHECK: br i1 %exit_cond, label %exit, label %loop
define amdgpu_kernel void @loop() #0 {
entry:
- %p = addrspacecast [10 x float] addrspace(3)* @array to float addrspace(4)*
- %end = getelementptr float, float addrspace(4)* %p, i64 10
+ %p = addrspacecast [10 x float] addrspace(3)* @array to float*
+ %end = getelementptr float, float* %p, i64 10
br label %loop
loop: ; preds = %loop, %entry
- %i = phi float addrspace(4)* [ %p, %entry ], [ %i2, %loop ]
- %v = load float, float addrspace(4)* %i
+ %i = phi float* [ %p, %entry ], [ %i2, %loop ]
+ %v = load float, float* %i
call void @use(float %v)
- %i2 = getelementptr float, float addrspace(4)* %i, i64 1
- %exit_cond = icmp eq float addrspace(4)* %i2, %end
+ %i2 = getelementptr float, float* %i, i64 1
+ %exit_cond = icmp eq float* %i2, %end
br i1 %exit_cond, label %exit, label %loop
exit: ; preds = %loop
ret void
}
- at generic_end = external addrspace(1) global float addrspace(4)*
+ at generic_end = external addrspace(1) global float*
; CHECK-LABEL: @loop_with_generic_bound(
; CHECK: %p = bitcast [10 x float] addrspace(3)* @array to float addrspace(3)*
-; CHECK: %end = load float addrspace(4)*, float addrspace(4)* addrspace(1)* @generic_end
+; CHECK: %end = load float*, float* addrspace(1)* @generic_end
; CHECK: br label %loop
; CHECK: loop:
@@ -147,21 +147,21 @@ exit:
; CHECK: %v = load float, float addrspace(3)* %i
; CHECK: call void @use(float %v)
; CHECK: %i2 = getelementptr float, float addrspace(3)* %i, i64 1
-; CHECK: %0 = addrspacecast float addrspace(3)* %i2 to float addrspace(4)*
-; CHECK: %exit_cond = icmp eq float addrspace(4)* %0, %end
+; CHECK: %0 = addrspacecast float addrspace(3)* %i2 to float*
+; CHECK: %exit_cond = icmp eq float* %0, %end
; CHECK: br i1 %exit_cond, label %exit, label %loop
define amdgpu_kernel void @loop_with_generic_bound() #0 {
entry:
- %p = addrspacecast [10 x float] addrspace(3)* @array to float addrspace(4)*
- %end = load float addrspace(4)*, float addrspace(4)* addrspace(1)* @generic_end
+ %p = addrspacecast [10 x float] addrspace(3)* @array to float*
+ %end = load float*, float* addrspace(1)* @generic_end
br label %loop
loop: ; preds = %loop, %entry
- %i = phi float addrspace(4)* [ %p, %entry ], [ %i2, %loop ]
- %v = load float, float addrspace(4)* %i
+ %i = phi float* [ %p, %entry ], [ %i2, %loop ]
+ %v = load float, float* %i
call void @use(float %v)
- %i2 = getelementptr float, float addrspace(4)* %i, i64 1
- %exit_cond = icmp eq float addrspace(4)* %i2, %end
+ %i2 = getelementptr float, float* %i, i64 1
+ %exit_cond = icmp eq float* %i2, %end
br i1 %exit_cond, label %exit, label %loop
exit: ; preds = %loop
Modified: llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll (original)
+++ llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll Fri Feb 2 08:07:16 2018
@@ -8,9 +8,9 @@
; CHECK-NEXT: store i32 8, i32 addrspace(3)* %gep0, align 8
; CHECK-NEXT: ret void
define void @addrspacecast_gep_addrspacecast(i32 addrspace(3)* %ptr) {
- %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
- %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9
- %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i32 addrspace(3)*
+ %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32*
+ %gep0 = getelementptr i32, i32* %asc0, i64 9
+ %asc1 = addrspacecast i32* %gep0 to i32 addrspace(3)*
store i32 8, i32 addrspace(3)* %asc1, align 8
ret void
}
@@ -21,9 +21,9 @@ define void @addrspacecast_gep_addrspace
; CHECK-NEXT: store i8 8, i8 addrspace(3)* [[CAST]], align 8
; CHECK-NEXT: ret void
define void @addrspacecast_different_pointee_type(i32 addrspace(3)* %ptr) {
- %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
- %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9
- %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i8 addrspace(3)*
+ %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32*
+ %gep0 = getelementptr i32, i32* %asc0, i64 9
+ %asc1 = addrspacecast i32* %gep0 to i8 addrspace(3)*
store i8 8, i8 addrspace(3)* %asc1, align 8
ret void
}
@@ -33,24 +33,24 @@ define void @addrspacecast_different_poi
; CHECK-NEXT: store volatile i32 addrspace(3)* %gep0, i32 addrspace(3)* addrspace(1)* undef
; CHECK-NEXT: ret void
define void @addrspacecast_to_memory(i32 addrspace(3)* %ptr) {
- %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
- %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9
- %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i32 addrspace(3)*
+ %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32*
+ %gep0 = getelementptr i32, i32* %asc0, i64 9
+ %asc1 = addrspacecast i32* %gep0 to i32 addrspace(3)*
store volatile i32 addrspace(3)* %asc1, i32 addrspace(3)* addrspace(1)* undef
ret void
}
; CHECK-LABEL: @multiuse_addrspacecast_gep_addrspacecast(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
-; CHECK-NEXT: store volatile i32 addrspace(4)* %1, i32 addrspace(4)* addrspace(1)* undef
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %ptr to i32*
+; CHECK-NEXT: store volatile i32* %1, i32* addrspace(1)* undef
; CHECK-NEXT: %gep0 = getelementptr i32, i32 addrspace(3)* %ptr, i64 9
; CHECK-NEXT: store i32 8, i32 addrspace(3)* %gep0, align 8
; CHECK-NEXT: ret void
define void @multiuse_addrspacecast_gep_addrspacecast(i32 addrspace(3)* %ptr) {
- %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
- store volatile i32 addrspace(4)* %asc0, i32 addrspace(4)* addrspace(1)* undef
- %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9
- %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i32 addrspace(3)*
+ %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32*
+ store volatile i32* %asc0, i32* addrspace(1)* undef
+ %gep0 = getelementptr i32, i32* %asc0, i64 9
+ %asc1 = addrspacecast i32* %gep0 to i32 addrspace(3)*
store i32 8, i32 addrspace(3)* %asc1, align 8
ret void
}
Modified: llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll (original)
+++ llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll Fri Feb 2 08:07:16 2018
@@ -9,8 +9,8 @@
; CHECK: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0
; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8
define void @simplified_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
- %gep0 = getelementptr inbounds double, double addrspace(4)* addrspacecast (double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384) to double addrspace(4)*), i64 %idx0
- %asc = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)*
+ %gep0 = getelementptr inbounds double, double* addrspacecast (double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384) to double*), i64 %idx0
+ %asc = addrspacecast double* %gep0 to double addrspace(3)*
store double 1.000000e+00, double addrspace(3)* %asc, align 8
ret void
}
@@ -19,8 +19,8 @@ define void @simplified_constexpr_gep_ad
; CHECK-NEXT: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0
; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8
define void @constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
- %gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0
- %asc = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)*
+ %gep0 = getelementptr inbounds double, double* getelementptr ([648 x double], [648 x double]* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double]*), i64 0, i64 384), i64 %idx0
+ %asc = addrspacecast double* %gep0 to double addrspace(3)*
store double 1.0, double addrspace(3)* %asc, align 8
ret void
}
@@ -30,27 +30,27 @@ define void @constexpr_gep_addrspacecast
; CHECK-NEXT: %gep1 = getelementptr inbounds double, double addrspace(3)* %gep0, i64 %idx1
; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep1, align 8
define void @constexpr_gep_gep_addrspacecast(i64 %idx0, i64 %idx1) {
- %gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0
- %gep1 = getelementptr inbounds double, double addrspace(4)* %gep0, i64 %idx1
- %asc = addrspacecast double addrspace(4)* %gep1 to double addrspace(3)*
+ %gep0 = getelementptr inbounds double, double* getelementptr ([648 x double], [648 x double]* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double]*), i64 0, i64 384), i64 %idx0
+ %gep1 = getelementptr inbounds double, double* %gep0, i64 %idx1
+ %asc = addrspacecast double* %gep1 to double addrspace(3)*
store double 1.0, double addrspace(3)* %asc, align 8
ret void
}
; Don't crash
; CHECK-LABEL: @vector_gep(
-; CHECK: %cast = addrspacecast <4 x [1024 x i32] addrspace(3)*> %array to <4 x [1024 x i32] addrspace(4)*>
+; CHECK: %cast = addrspacecast <4 x [1024 x i32] addrspace(3)*> %array to <4 x [1024 x i32]*>
define amdgpu_kernel void @vector_gep(<4 x [1024 x i32] addrspace(3)*> %array) nounwind {
- %cast = addrspacecast <4 x [1024 x i32] addrspace(3)*> %array to <4 x [1024 x i32] addrspace(4)*>
- %p = getelementptr [1024 x i32], <4 x [1024 x i32] addrspace(4)*> %cast, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
- %p0 = extractelement <4 x i32 addrspace(4)*> %p, i32 0
- %p1 = extractelement <4 x i32 addrspace(4)*> %p, i32 1
- %p2 = extractelement <4 x i32 addrspace(4)*> %p, i32 2
- %p3 = extractelement <4 x i32 addrspace(4)*> %p, i32 3
- store i32 99, i32 addrspace(4)* %p0
- store i32 99, i32 addrspace(4)* %p1
- store i32 99, i32 addrspace(4)* %p2
- store i32 99, i32 addrspace(4)* %p3
+ %cast = addrspacecast <4 x [1024 x i32] addrspace(3)*> %array to <4 x [1024 x i32]*>
+ %p = getelementptr [1024 x i32], <4 x [1024 x i32]*> %cast, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
+ %p0 = extractelement <4 x i32*> %p, i32 0
+ %p1 = extractelement <4 x i32*> %p, i32 1
+ %p2 = extractelement <4 x i32*> %p, i32 2
+ %p3 = extractelement <4 x i32*> %p, i32 3
+ store i32 99, i32* %p0
+ store i32 99, i32* %p1
+ store i32 99, i32* %p2
+ store i32 99, i32* %p3
ret void
}
@@ -61,12 +61,12 @@ define amdgpu_kernel void @vector_gep(<4
; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep1, align 8
; CHECK-NEXT: ret void
define void @repeated_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
- %gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0
- %asc0 = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)*
+ %gep0 = getelementptr inbounds double, double* getelementptr ([648 x double], [648 x double]* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double]*), i64 0, i64 384), i64 %idx0
+ %asc0 = addrspacecast double* %gep0 to double addrspace(3)*
store double 1.0, double addrspace(3)* %asc0, align 8
- %gep1 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx1
- %asc1 = addrspacecast double addrspace(4)* %gep1 to double addrspace(3)*
+ %gep1 = getelementptr inbounds double, double* getelementptr ([648 x double], [648 x double]* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double]*), i64 0, i64 384), i64 %idx1
+ %asc1 = addrspacecast double* %gep1 to double addrspace(3)*
store double 1.0, double addrspace(3)* %asc1, align 8
ret void
Modified: llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll (original)
+++ llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll Fri Feb 2 08:07:16 2018
@@ -3,143 +3,143 @@
; CHECK-LABEL: @objectsize_group_to_flat_i32(
; CHECK: %val = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* %group.ptr, i1 true, i1 false)
define i32 @objectsize_group_to_flat_i32(i8 addrspace(3)* %group.ptr) #0 {
- %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
- %val = call i32 @llvm.objectsize.i32.p4i8(i8 addrspace(4)* %cast, i1 true, i1 false)
+ %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
+ %val = call i32 @llvm.objectsize.i32.p0i8(i8* %cast, i1 true, i1 false)
ret i32 %val
}
; CHECK-LABEL: @objectsize_global_to_flat_i64(
; CHECK: %val = call i64 @llvm.objectsize.i64.p3i8(i8 addrspace(3)* %global.ptr, i1 true, i1 false)
define i64 @objectsize_global_to_flat_i64(i8 addrspace(3)* %global.ptr) #0 {
- %cast = addrspacecast i8 addrspace(3)* %global.ptr to i8 addrspace(4)*
- %val = call i64 @llvm.objectsize.i64.p4i8(i8 addrspace(4)* %cast, i1 true, i1 false)
+ %cast = addrspacecast i8 addrspace(3)* %global.ptr to i8*
+ %val = call i64 @llvm.objectsize.i64.p0i8(i8* %cast, i1 true, i1 false)
ret i64 %val
}
; CHECK-LABEL: @atomicinc_global_to_flat_i32(
; CHECK: call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %global.ptr, i32 %y, i32 0, i32 0, i1 false)
define i32 @atomicinc_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %y) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %cast, i32 %y, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %cast, i32 %y, i32 0, i32 0, i1 false)
ret i32 %ret
}
; CHECK-LABEL: @atomicinc_group_to_flat_i32(
; CHECK: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %group.ptr, i32 %y, i32 0, i32 0, i1 false)
define i32 @atomicinc_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %y) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %cast, i32 %y, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %cast, i32 %y, i32 0, i32 0, i1 false)
ret i32 %ret
}
; CHECK-LABEL: @atomicinc_global_to_flat_i64(
; CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %global.ptr, i64 %y, i32 0, i32 0, i1 false)
define i64 @atomicinc_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 false)
ret i64 %ret
}
; CHECK-LABEL: @atomicinc_group_to_flat_i64(
; CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %group.ptr, i64 %y, i32 0, i32 0, i1 false)
define i64 @atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 false)
ret i64 %ret
}
; CHECK-LABEL: @atomicdec_global_to_flat_i32(
; CHECK: call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %global.ptr, i32 %val, i32 0, i32 0, i1 false)
define i32 @atomicdec_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %cast, i32 %val, i32 0, i32 0, i1 false)
ret i32 %ret
}
; CHECK-LABEL: @atomicdec_group_to_flat_i32(
; CHECK: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %group.ptr, i32 %val, i32 0, i32 0, i1 false)
define i32 @atomicdec_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %cast, i32 %val, i32 0, i32 0, i1 false)
ret i32 %ret
}
; CHECK-LABEL: @atomicdec_global_to_flat_i64(
; CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %global.ptr, i64 %y, i32 0, i32 0, i1 false)
define i64 @atomicdec_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 false)
ret i64 %ret
}
; CHECK-LABEL: @atomicdec_group_to_flat_i64(
; CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %group.ptr, i64 %y, i32 0, i32 0, i1 false
define i64 @atomicdec_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 false)
ret i64 %ret
}
; CHECK-LABEL: @volatile_atomicinc_group_to_flat_i64(
-; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
-; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %1, i64 %y, i32 0, i32 0, i1 true)
+; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 true)
define i64 @volatile_atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 true)
+ %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 true)
ret i64 %ret
}
; CHECK-LABEL: @volatile_atomicdec_global_to_flat_i32(
-; CHECK-NEXT: %1 = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
-; CHECK-NEXT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %1, i32 %val, i32 0, i32 0, i1 true)
+; CHECK-NEXT: %1 = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+; CHECK-NEXT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %1, i32 %val, i32 0, i32 0, i1 true)
define i32 @volatile_atomicdec_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val, i32 0, i32 0, i1 true)
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %cast, i32 %val, i32 0, i32 0, i1 true)
ret i32 %ret
}
; CHECK-LABEL: @volatile_atomicdec_group_to_flat_i32(
-; CHECK-NEXT: %1 = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
-; CHECK-NEXT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %1, i32 %val, i32 0, i32 0, i1 true)
+; CHECK-NEXT: %1 = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+; CHECK-NEXT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %1, i32 %val, i32 0, i32 0, i1 true)
define i32 @volatile_atomicdec_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val, i32 0, i32 0, i1 true)
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %cast, i32 %val, i32 0, i32 0, i1 true)
ret i32 %ret
}
; CHECK-LABEL: @volatile_atomicdec_global_to_flat_i64(
-; CHECK-NEXT: %1 = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)*
-; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %1, i64 %y, i32 0, i32 0, i1 true)
+; CHECK-NEXT: %1 = addrspacecast i64 addrspace(1)* %global.ptr to i64*
+; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 true)
define i64 @volatile_atomicdec_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 true)
+ %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 true)
ret i64 %ret
}
; CHECK-LABEL: @volatile_atomicdec_group_to_flat_i64(
-; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
-; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %1, i64 %y, i32 0, i32 0, i1 true)
+; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 true)
define i64 @volatile_atomicdec_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 true)
+ %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 true)
ret i64 %ret
}
; CHECK-LABEL: @invalid_variable_volatile_atomicinc_group_to_flat_i64(
-; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
-; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %1, i64 %y, i32 0, i32 0, i1 %volatile.var)
+; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 %volatile.var)
define i64 @invalid_variable_volatile_atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y, i1 %volatile.var) #0 {
- %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 %volatile.var)
+ %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 %volatile.var)
ret i64 %ret
}
-declare i32 @llvm.objectsize.i32.p4i8(i8 addrspace(4)*, i1, i1) #1
-declare i64 @llvm.objectsize.i64.p4i8(i8 addrspace(4)*, i1, i1) #1
-declare i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* nocapture, i32, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* nocapture, i64, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* nocapture, i32, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* nocapture, i64, i32, i32, i1) #2
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1) #1
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1) #1
+declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
+declare i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
+declare i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
+declare i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll (original)
+++ llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll Fri Feb 2 08:07:16 2018
@@ -3,100 +3,100 @@
; CHECK-LABEL: @memset_group_to_flat(
; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* align 4 %group.ptr, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define amdgpu_kernel void @memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
- %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
- call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memset_global_to_flat(
; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* align 4 %global.ptr, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define amdgpu_kernel void @memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
- %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
- call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memset_group_to_flat_no_md(
; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* align 4 %group.ptr, i8 4, i64 %size, i1 false){{$}}
define amdgpu_kernel void @memset_group_to_flat_no_md(i8 addrspace(3)* %group.ptr, i64 %size) #0 {
- %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
- call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 %size, i1 false)
+ %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 %size, i1 false)
ret void
}
; CHECK-LABEL: @memset_global_to_flat_no_md(
; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* align 4 %global.ptr, i8 4, i64 %size, i1 false){{$}}
define amdgpu_kernel void @memset_global_to_flat_no_md(i8 addrspace(1)* %global.ptr, i64 %size) #0 {
- %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
- call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 %size, i1 false)
+ %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 %size, i1 false)
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group(
-; CHCK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
-define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+; CHCK: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(i8* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %dest, i8* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_with_group(
-; CHECK: call void @llvm.memcpy.p3i8.p4i8.i64(i8 addrspace(3)* align 4 %dest.group.ptr, i8 addrspace(4)* align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
-define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size) #0 {
- %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %cast.dest, i8 addrspace(4)* align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+; CHECK: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align 4 %dest.group.ptr, i8* align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.group.ptr, i8* %src.ptr, i64 %size) #0 {
+ %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %cast.dest, i8* align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_src_with_group(
; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* align 4 %src.group.ptr, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_src_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- %cast.dest = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %cast.dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ %cast.dest = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %cast.dest, i8* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_group_src_global(
; CHECK: call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* align 4 %dest.group.ptr, i8 addrspace(1)* align 4 %src.global.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_group_src_global(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(1)* %src.global.ptr to i8 addrspace(4)*
- %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %cast.dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ %cast.src = addrspacecast i8 addrspace(1)* %src.global.ptr to i8*
+ %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %cast.dest, i8* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memcpy_group_to_flat_replace_dest_global(
; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %dest.global.ptr, i8 addrspace(3)* align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define amdgpu_kernel void @memcpy_group_to_flat_replace_dest_global(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size) #0 {
- %cast.dest = addrspacecast i8 addrspace(1)* %dest.global.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* align 4 %cast.dest, i8 addrspace(3)* align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ %cast.dest = addrspacecast i8 addrspace(1)* %dest.global.ptr to i8*
+ call void @llvm.memcpy.p0i8.p3i8.i32(i8* align 4 %cast.dest, i8 addrspace(3)* align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(
-; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa.struct !7
-define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false), !tbaa.struct !7
+; CHECK: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa.struct !7
+define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %dest, i8* align 4 %cast.src, i64 %size, i1 false), !tbaa.struct !7
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_no_md(
-; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}}
-define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}}
+define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %dest, i8* align 4 %cast.src, i64 %size, i1 false)
ret void
}
; CHECK-LABEL: @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(
-; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest0, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}}
-; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest1, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}}
-define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest0, i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest0, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false)
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest1, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 4 %dest0, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}}
+; CHECK: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 4 %dest1, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}}
+define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8* %dest0, i8* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %dest0, i8* align 4 %cast.src, i64 %size, i1 false)
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %dest1, i8* align 4 %cast.src, i64 %size, i1 false)
ret void
}
@@ -104,22 +104,22 @@ define amdgpu_kernel void @multiple_memc
; CHECK-LABEL: @memcpy_group_flat_to_flat_self(
; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* align 4 %group.ptr, i8 addrspace(3)* align 4 %group.ptr, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define amdgpu_kernel void @memcpy_group_flat_to_flat_self(i8 addrspace(3)* %group.ptr) #0 {
- %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 addrspace(4)* align 4 %cast, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %cast, i8* align 4 %cast, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memmove_flat_to_flat_replace_src_with_group(
-; CHECK: call void @llvm.memmove.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
-define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- call void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+; CHECK: call void @llvm.memmove.p0i8.p3i8.i64(i8* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(i8* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ call void @llvm.memmove.p4i8.p0i8.i64(i8* align 4 %dest, i8* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
-declare void @llvm.memset.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8, i64, i1) #1
-declare void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8 addrspace(4)* nocapture readonly, i64, i1) #1
-declare void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* nocapture writeonly, i8 addrspace(3)* nocapture readonly, i32, i1) #1
-declare void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8 addrspace(4)* nocapture readonly, i64, i1) #1
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1
+declare void @llvm.memcpy.p4i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
+declare void @llvm.memcpy.p0i8.p3i8.i32(i8* nocapture writeonly, i8 addrspace(3)* nocapture readonly, i32, i1) #1
+declare void @llvm.memmove.p4i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
attributes #0 = { nounwind }
attributes #1 = { argmemonly nounwind }
Modified: llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll (original)
+++ llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll Fri Feb 2 08:07:16 2018
@@ -1,4 +1,4 @@
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
+; RUN: opt -data-layout=A5 -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
; Regression tests from old HSAIL addrspacecast optimization pass
@@ -14,7 +14,7 @@ entry:
%tmp1 = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = add i64 %tmp2, %arg0
- %vecload1 = load <2 x double>, <2 x double> addrspace(4)* bitcast (double addrspace(4)* getelementptr ([100 x double], [100 x double] addrspace(4)* addrspacecast ([100 x double] addrspace(1)* @data to [100 x double] addrspace(4)*), i64 0, i64 4) to <2 x double> addrspace(4)*), align 8
+ %vecload1 = load <2 x double>, <2 x double>* bitcast (double* getelementptr ([100 x double], [100 x double]* addrspacecast ([100 x double] addrspace(1)* @data to [100 x double]*), i64 0, i64 4) to <2 x double>*), align 8
%cmp = fcmp ord <2 x double> %vecload1, zeroinitializer
%sext = sext <2 x i1> %cmp to <2 x i64>
%tmp4 = extractelement <2 x i64> %sext, i64 0
@@ -30,7 +30,7 @@ entry:
@generic_address_bug9749.val = internal addrspace(1) global float 0.0, align 4
-declare i32 @_Z9get_fencePU3AS4v(i8 addrspace(4)*)
+declare i32 @_Z9get_fencePv(i8*)
%opencl.pipe_t = type opaque
; This is a compile time assert bug, but we still want to check optimization
@@ -53,24 +53,24 @@ entry:
; Should generate flat load
; CHECK-LABEL: @generic_address_bug9749(
; CHECK: br i1
-; CHECK: load float, float addrspace(4)*
+; CHECK: load float, float*
; CHECK: br label
define amdgpu_kernel void @generic_address_bug9749(i32 addrspace(1)* nocapture %results) #0 {
entry:
- %ptr = alloca float addrspace(4)*, align 8
+ %ptr = alloca float*, align 8, addrspace(5)
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = zext i32 %tmp to i64
store float 0x3FB99999A0000000, float addrspace(1)* @generic_address_bug9749.val, align 4
- store volatile float addrspace(4)* addrspacecast (float addrspace(1)* @generic_address_bug9749.val to float addrspace(4)*), float addrspace(4)** %ptr, align 8
- %tmp2 = load volatile float addrspace(4)*, float addrspace(4)** %ptr, align 8
+ store volatile float* addrspacecast (float addrspace(1)* @generic_address_bug9749.val to float*), float* addrspace(5)* %ptr, align 8
+ %tmp2 = load volatile float*, float* addrspace(5)* %ptr, align 8
%tmp3 = load float, float addrspace(1)* @generic_address_bug9749.val, align 4
- %tmp4 = bitcast float addrspace(4)* %tmp2 to i8 addrspace(4)*
- %call.i = call i32 @_Z9get_fencePU3AS4v(i8 addrspace(4)* %tmp4) #1
+ %tmp4 = bitcast float* %tmp2 to i8*
+ %call.i = call i32 @_Z9get_fencePv(i8* %tmp4) #1
%switch.i.i = icmp ult i32 %call.i, 4
br i1 %switch.i.i, label %if.end.i, label %helperFunction.exit
if.end.i: ; preds = %entry
- %tmp5 = load float, float addrspace(4)* %tmp2, align 4
+ %tmp5 = load float, float* %tmp2, align 4
%not.cmp.i = fcmp oeq float %tmp5, %tmp3
%phitmp = zext i1 %not.cmp.i to i32
br label %helperFunction.exit
@@ -91,14 +91,14 @@ entry:
br i1 %cmp1, label %for.end, label %for.body.lr.ph
for.body.lr.ph: ; preds = %entry
- %tmp = addrspacecast i32 addrspace(3)* %in to i32 addrspace(4)*
+ %tmp = addrspacecast i32 addrspace(3)* %in to i32*
br label %for.body
for.body: ; preds = %for.body, %for.body.lr.ph
%i.03 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
- %ptr.02 = phi i32 addrspace(4)* [ %tmp, %for.body.lr.ph ], [ %add.ptr, %for.body ]
- store i32 %i.03, i32 addrspace(4)* %ptr.02, align 4
- %add.ptr = getelementptr inbounds i32, i32 addrspace(4)* %ptr.02, i64 4
+ %ptr.02 = phi i32* [ %tmp, %for.body.lr.ph ], [ %add.ptr, %for.body ]
+ store i32 %i.03, i32* %ptr.02, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %ptr.02, i64 4
%inc = add nuw i32 %i.03, 1
%exitcond = icmp eq i32 %inc, %numElems
br i1 %exitcond, label %for.end, label %for.body
@@ -116,23 +116,23 @@ entry:
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = add i64 %tmp2, %arg0
%sext = shl i64 %tmp3, 32
- %tmp4 = addrspacecast i32 addrspace(3)* %destValues to i32 addrspace(4)*
- %tmp5 = addrspacecast i32 addrspace(3)* %sourceA to i32 addrspace(4)*
+ %tmp4 = addrspacecast i32 addrspace(3)* %destValues to i32*
+ %tmp5 = addrspacecast i32 addrspace(3)* %sourceA to i32*
%tmp6 = ashr exact i64 %sext, 31
- %tmp7 = getelementptr inbounds i32, i32 addrspace(4)* %tmp5, i64 %tmp6
- %arrayidx_v4 = bitcast i32 addrspace(4)* %tmp7 to <2 x i32> addrspace(4)*
- %vecload = load <2 x i32>, <2 x i32> addrspace(4)* %arrayidx_v4, align 4
+ %tmp7 = getelementptr inbounds i32, i32* %tmp5, i64 %tmp6
+ %arrayidx_v4 = bitcast i32* %tmp7 to <2 x i32>*
+ %vecload = load <2 x i32>, <2 x i32>* %arrayidx_v4, align 4
%tmp8 = extractelement <2 x i32> %vecload, i32 0
%tmp9 = extractelement <2 x i32> %vecload, i32 1
%tmp10 = icmp eq i32 %tmp8, 0
%tmp11 = select i1 %tmp10, i32 32, i32 %tmp8
%tmp12 = icmp eq i32 %tmp9, 0
%tmp13 = select i1 %tmp12, i32 32, i32 %tmp9
- %tmp14 = getelementptr inbounds i32, i32 addrspace(4)* %tmp4, i64 %tmp6
+ %tmp14 = getelementptr inbounds i32, i32* %tmp4, i64 %tmp6
%tmp15 = insertelement <2 x i32> undef, i32 %tmp11, i32 0
%tmp16 = insertelement <2 x i32> %tmp15, i32 %tmp13, i32 1
- %arrayidx_v41 = bitcast i32 addrspace(4)* %tmp14 to <2 x i32> addrspace(4)*
- store <2 x i32> %tmp16, <2 x i32> addrspace(4)* %arrayidx_v41, align 4
+ %arrayidx_v41 = bitcast i32* %tmp14 to <2 x i32>*
+ store <2 x i32> %tmp16, <2 x i32>* %arrayidx_v41, align 4
ret void
}
@@ -140,4 +140,4 @@ declare i32 @llvm.amdgcn.workitem.id.x()
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
\ No newline at end of file
+attributes #2 = { nounwind readnone }
Modified: llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/select.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/select.ll (original)
+++ llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/select.ll Fri Feb 2 08:07:16 2018
@@ -4,25 +4,25 @@
; this doesn't do something insane on non-canonical IR.
; CHECK-LABEL: @return_select_group_flat(
-; CHECK-NEXT: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK-NEXT: %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
-; CHECK-NEXT: %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1
-; CHECK-NEXT: ret i32 addrspace(4)* %select
-define i32 addrspace(4)* @return_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1
- ret i32 addrspace(4)* %select
+; CHECK-NEXT: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK-NEXT: %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+; CHECK-NEXT: %select = select i1 %c, i32* %cast0, i32* %cast1
+; CHECK-NEXT: ret i32* %select
+define i32* @return_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+ %select = select i1 %c, i32* %cast0, i32* %cast1
+ ret i32* %select
}
; CHECK-LABEL: @store_select_group_flat(
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1
; CHECK: store i32 -1, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1
- store i32 -1, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+ %select = select i1 %c, i32* %cast0, i32* %cast1
+ store i32 -1, i32* %select
ret void
}
@@ -31,23 +31,23 @@ define amdgpu_kernel void @store_select_
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1, !prof !0
; CHECK: %load = load i32, i32 addrspace(3)* %select
define i32 @load_select_group_flat_md(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1, !prof !0
- %load = load i32, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+ %select = select i1 %c, i32* %cast0, i32* %cast1, !prof !0
+ %load = load i32, i32* %select
ret i32 %load
}
; CHECK-LABEL: @store_select_mismatch_group_private_flat(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %2 = addrspacecast i32* %private.ptr.1 to i32 addrspace(4)*
-; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* %2
-; CHECK: store i32 -1, i32 addrspace(4)* %select
-define amdgpu_kernel void @store_select_mismatch_group_private_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32* %private.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cast1 = addrspacecast i32* %private.ptr.1 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1
- store i32 -1, i32 addrspace(4)* %select
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %2 = addrspacecast i32 addrspace(5)* %private.ptr.1 to i32*
+; CHECK: %select = select i1 %c, i32* %1, i32* %2
+; CHECK: store i32 -1, i32* %select
+define amdgpu_kernel void @store_select_mismatch_group_private_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(5)* %private.ptr.1) #0 {
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cast1 = addrspacecast i32 addrspace(5)* %private.ptr.1 to i32*
+ %select = select i1 %c, i32* %cast0, i32* %cast1
+ store i32 -1, i32* %select
ret void
}
@@ -58,35 +58,35 @@ define amdgpu_kernel void @store_select_
; CHECK: %tmp = load i32, i32 addrspace(3)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(3)* @lds0, i32 addrspace(3)* @lds1)
define i32 @constexpr_select_group_flat() #0 {
bb:
- %tmp = load i32, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds1 to i32 addrspace(4)*))
+ %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(3)* @lds1 to i32*))
ret i32 %tmp
}
; CHECK-LABEL: @constexpr_select_group_global_flat_mismatch(
-; CHECK: %tmp = load i32, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*))
+; CHECK: %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*))
define i32 @constexpr_select_group_global_flat_mismatch() #0 {
bb:
- %tmp = load i32, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*))
+ %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*))
ret i32 %tmp
}
; CHECK-LABEL: @store_select_group_flat_null(
-; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*)
+; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*)
; CHECK: store i32 -1, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* null
- store i32 -1, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* null
+ store i32 -1, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_flat_null_swap(
-; CHECK: %select = select i1 %c, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), i32 addrspace(3)* %group.ptr.0
+; CHECK: %select = select i1 %c, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*), i32 addrspace(3)* %group.ptr.0
; CHECK: store i32 -1, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_null_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* null, i32 addrspace(4)* %cast0
- store i32 -1, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* null, i32* %cast0
+ store i32 -1, i32* %select
ret void
}
@@ -94,9 +94,9 @@ define amdgpu_kernel void @store_select_
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* undef
; CHECK: store i32 -1, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_undef(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* undef
- store i32 -1, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* undef
+ store i32 -1, i32* %select
ret void
}
@@ -104,21 +104,21 @@ define amdgpu_kernel void @store_select_
; CHECK: %select = select i1 %c, i32 addrspace(3)* undef, i32 addrspace(3)* %group.ptr.0
; CHECK: store i32 -1, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_undef_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* undef, i32 addrspace(4)* %cast0
- store i32 -1, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* undef, i32* %cast0
+ store i32 -1, i32* %select
ret void
}
; CHECK-LABEL: @store_select_gep_group_flat_null(
-; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*)
+; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*)
; CHECK: %gep = getelementptr i32, i32 addrspace(3)* %select, i64 16
; CHECK: store i32 -1, i32 addrspace(3)* %gep
define amdgpu_kernel void @store_select_gep_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* null
- %gep = getelementptr i32, i32 addrspace(4)* %select, i64 16
- store i32 -1, i32 addrspace(4)* %gep
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* null
+ %gep = getelementptr i32, i32* %select, i64 16
+ store i32 -1, i32* %gep
ret void
}
@@ -128,19 +128,19 @@ define amdgpu_kernel void @store_select_
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* @lds1
; CHECK: store i32 7, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds1 to i32 addrspace(4)*)
- store i32 7, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(3)* @lds1 to i32*)
+ store i32 7, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_flat_inttoptr_flat(
-; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* inttoptr (i64 12345 to i32 addrspace(4)*) to i32 addrspace(3)*)
+; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* inttoptr (i64 12345 to i32*) to i32 addrspace(3)*)
; CHECK: store i32 7, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_inttoptr_flat(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* inttoptr (i64 12345 to i32 addrspace(4)*)
- store i32 7, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* inttoptr (i64 12345 to i32*)
+ store i32 7, i32* %select
ret void
}
@@ -148,114 +148,114 @@ define amdgpu_kernel void @store_select_
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*)
; CHECK-NEXT: store i32 7, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_inttoptr_group(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*) to i32 addrspace(4)*)
- store i32 7, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*) to i32*)
+ store i32 7, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_flat_constexpr(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)
-; CHECK: store i32 7, i32 addrspace(4)* %select
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %select = select i1 %c, i32* %1, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)
+; CHECK: store i32 7, i32* %select
define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)
- store i32 7, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)
+ store i32 7, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_flat_constexpr_swap(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*), i32 addrspace(4)* %1
-; CHECK: store i32 7, i32 addrspace(4)* %select
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %select = select i1 %c, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*), i32* %1
+; CHECK: store i32 7, i32* %select
define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*), i32 addrspace(4)* %cast0
- store i32 7, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*), i32* %cast0
+ store i32 7, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_null_null(
-; CHECK: %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)
-; CHECK: store i32 7, i32 addrspace(4)* %select
+; CHECK: %select = select i1 %c, i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)
+; CHECK: store i32 7, i32* %select
define amdgpu_kernel void @store_select_group_global_mismatch_null_null(i1 %c) #0 {
- %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)
- store i32 7, i32 addrspace(4)* %select
+ %select = select i1 %c, i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)
+ store i32 7, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_null_null_constexpr(
-; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
define amdgpu_kernel void @store_select_group_global_mismatch_null_null_constexpr() #0 {
- store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+ store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_gv_null_constexpr(
-; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
define amdgpu_kernel void @store_select_group_global_mismatch_gv_null_constexpr() #0 {
- store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+ store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_null_gv_constexpr(
-; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)), align 4
+; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)), align 4
define amdgpu_kernel void @store_select_group_global_mismatch_null_gv_constexpr() #0 {
- store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)), align 4
+ store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)), align 4
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_inttoptr_null_constexpr(
-; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_null_constexpr() #0 {
- store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+ store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_inttoptr_flat_null_constexpr(
-; CHECK: store i32 7, i32 addrspace(1)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(1)* addrspacecast (i32 addrspace(4)* inttoptr (i64 123 to i32 addrspace(4)*) to i32 addrspace(1)*), i32 addrspace(1)* null), align 4
+; CHECK: store i32 7, i32 addrspace(1)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(1)* addrspacecast (i32* inttoptr (i64 123 to i32*) to i32 addrspace(1)*), i32 addrspace(1)* null), align 4
define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_flat_null_constexpr() #0 {
- store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* inttoptr (i64 123 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+ store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* inttoptr (i64 123 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_undef_undef_constexpr(
; CHECK: store i32 7, i32 addrspace(3)* null
define amdgpu_kernel void @store_select_group_global_mismatch_undef_undef_constexpr() #0 {
- store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* undef to i32 addrspace(4)*)), align 4
+ store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* undef to i32*)), align 4
ret void
}
@lds2 = external addrspace(3) global [1024 x i32], align 4
; CHECK-LABEL: @store_select_group_constexpr_ptrtoint(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32 addrspace(4)*)
-; CHECK: store i32 7, i32 addrspace(4)* %select
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %select = select i1 %c, i32* %1, i32* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32*)
+; CHECK: store i32 7, i32* %select
define amdgpu_kernel void @store_select_group_constexpr_ptrtoint(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32 addrspace(4)*)
- store i32 7, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32*)
+ store i32 7, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_flat_vector(
-; CHECK: %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32 addrspace(4)*>
-; CHECK: %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32 addrspace(4)*>
-; CHECK: %select = select i1 %c, <2 x i32 addrspace(4)*> %cast0, <2 x i32 addrspace(4)*> %cast1
-; CHECK: %extract0 = extractelement <2 x i32 addrspace(4)*> %select, i32 0
-; CHECK: %extract1 = extractelement <2 x i32 addrspace(4)*> %select, i32 1
-; CHECK: store i32 -1, i32 addrspace(4)* %extract0
-; CHECK: store i32 -2, i32 addrspace(4)* %extract1
+; CHECK: %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32*>
+; CHECK: %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32*>
+; CHECK: %select = select i1 %c, <2 x i32*> %cast0, <2 x i32*> %cast1
+; CHECK: %extract0 = extractelement <2 x i32*> %select, i32 0
+; CHECK: %extract1 = extractelement <2 x i32*> %select, i32 1
+; CHECK: store i32 -1, i32* %extract0
+; CHECK: store i32 -2, i32* %extract1
define amdgpu_kernel void @store_select_group_flat_vector(i1 %c, <2 x i32 addrspace(3)*> %group.ptr.0, <2 x i32 addrspace(3)*> %group.ptr.1) #0 {
- %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32 addrspace(4)*>
- %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32 addrspace(4)*>
- %select = select i1 %c, <2 x i32 addrspace(4)*> %cast0, <2 x i32 addrspace(4)*> %cast1
- %extract0 = extractelement <2 x i32 addrspace(4)*> %select, i32 0
- %extract1 = extractelement <2 x i32 addrspace(4)*> %select, i32 1
- store i32 -1, i32 addrspace(4)* %extract0
- store i32 -2, i32 addrspace(4)* %extract1
+ %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32*>
+ %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32*>
+ %select = select i1 %c, <2 x i32*> %cast0, <2 x i32*> %cast1
+ %extract0 = extractelement <2 x i32*> %select, i32 0
+ %extract1 = extractelement <2 x i32*> %select, i32 1
+ store i32 -1, i32* %extract0
+ store i32 -2, i32* %extract1
ret void
}
Modified: llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll (original)
+++ llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll Fri Feb 2 08:07:16 2018
@@ -3,138 +3,138 @@
; Check that volatile users of addrspacecast are not replaced.
; CHECK-LABEL: @volatile_load_flat_from_global(
-; CHECK: load volatile i32, i32 addrspace(4)*
+; CHECK: load volatile i32, i32*
; CHECK: store i32 %val, i32 addrspace(1)*
define amdgpu_kernel void @volatile_load_flat_from_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)*
- %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(1)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(1)* %output to i32*
+ %val = load volatile i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_load_flat_from_constant(
-; CHECK: load volatile i32, i32 addrspace(4)*
+; CHECK: load volatile i32, i32*
; CHECK: store i32 %val, i32 addrspace(1)*
define amdgpu_kernel void @volatile_load_flat_from_constant(i32 addrspace(2)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(2)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)*
- %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(2)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(1)* %output to i32*
+ %val = load volatile i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_load_flat_from_group(
-; CHECK: load volatile i32, i32 addrspace(4)*
+; CHECK: load volatile i32, i32*
; CHECK: store i32 %val, i32 addrspace(3)*
define amdgpu_kernel void @volatile_load_flat_from_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)*
- %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(3)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(3)* %output to i32*
+ %val = load volatile i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_load_flat_from_private(
-; CHECK: load volatile i32, i32 addrspace(4)*
-; CHECK: store i32 %val, i32*
-define amdgpu_kernel void @volatile_load_flat_from_private(i32* nocapture %input, i32* nocapture %output) #0 {
- %tmp0 = addrspacecast i32* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32* %output to i32 addrspace(4)*
- %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+; CHECK: load volatile i32, i32*
+; CHECK: store i32 %val, i32 addrspace(5)*
+define amdgpu_kernel void @volatile_load_flat_from_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 {
+ %tmp0 = addrspacecast i32 addrspace(5)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(5)* %output to i32*
+ %val = load volatile i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_store_flat_to_global(
; CHECK: load i32, i32 addrspace(1)*
-; CHECK: store volatile i32 %val, i32 addrspace(4)*
+; CHECK: store volatile i32 %val, i32*
define amdgpu_kernel void @volatile_store_flat_to_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)*
- %val = load i32, i32 addrspace(4)* %tmp0, align 4
- store volatile i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(1)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(1)* %output to i32*
+ %val = load i32, i32* %tmp0, align 4
+ store volatile i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_store_flat_to_group(
; CHECK: load i32, i32 addrspace(3)*
-; CHECK: store volatile i32 %val, i32 addrspace(4)*
+; CHECK: store volatile i32 %val, i32*
define amdgpu_kernel void @volatile_store_flat_to_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)*
- %val = load i32, i32 addrspace(4)* %tmp0, align 4
- store volatile i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(3)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(3)* %output to i32*
+ %val = load i32, i32* %tmp0, align 4
+ store volatile i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_store_flat_to_private(
-; CHECK: load i32, i32*
-; CHECK: store volatile i32 %val, i32 addrspace(4)*
-define amdgpu_kernel void @volatile_store_flat_to_private(i32* nocapture %input, i32* nocapture %output) #0 {
- %tmp0 = addrspacecast i32* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32* %output to i32 addrspace(4)*
- %val = load i32, i32 addrspace(4)* %tmp0, align 4
- store volatile i32 %val, i32 addrspace(4)* %tmp1, align 4
+; CHECK: load i32, i32 addrspace(5)*
+; CHECK: store volatile i32 %val, i32*
+define amdgpu_kernel void @volatile_store_flat_to_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 {
+ %tmp0 = addrspacecast i32 addrspace(5)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(5)* %output to i32*
+ %val = load i32, i32* %tmp0, align 4
+ store volatile i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_atomicrmw_add_group_to_flat(
-; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
-; CHECK: atomicrmw volatile add i32 addrspace(4)*
+; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32*
+; CHECK: atomicrmw volatile add i32*
define i32 @volatile_atomicrmw_add_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %y) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = atomicrmw volatile add i32 addrspace(4)* %cast, i32 %y seq_cst
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = atomicrmw volatile add i32* %cast, i32 %y seq_cst
ret i32 %ret
}
; CHECK-LABEL: @volatile_atomicrmw_add_global_to_flat(
-; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
-; CHECK: %ret = atomicrmw volatile add i32 addrspace(4)*
+; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32*
+; CHECK: %ret = atomicrmw volatile add i32*
define i32 @volatile_atomicrmw_add_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %y) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = atomicrmw volatile add i32 addrspace(4)* %cast, i32 %y seq_cst
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = atomicrmw volatile add i32* %cast, i32 %y seq_cst
ret i32 %ret
}
; CHECK-LABEL: @volatile_cmpxchg_global_to_flat(
-; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
-; CHECK: cmpxchg volatile i32 addrspace(4)*
+; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32*
+; CHECK: cmpxchg volatile i32*
define { i32, i1 } @volatile_cmpxchg_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = cmpxchg volatile i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = cmpxchg volatile i32* %cast, i32 %cmp, i32 %val seq_cst monotonic
ret { i32, i1 } %ret
}
; CHECK-LABEL: @volatile_cmpxchg_group_to_flat(
-; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
-; CHECK: cmpxchg volatile i32 addrspace(4)*
+; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32*
+; CHECK: cmpxchg volatile i32*
define { i32, i1 } @volatile_cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = cmpxchg volatile i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = cmpxchg volatile i32* %cast, i32 %cmp, i32 %val seq_cst monotonic
ret { i32, i1 } %ret
}
; FIXME: Shouldn't be losing names
; CHECK-LABEL: @volatile_memset_group_to_flat(
-; CHECK: addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
-; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %1, i8 4, i64 32, i1 true)
+; CHECK: addrspacecast i8 addrspace(3)* %group.ptr to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 4, i64 32, i1 true)
define amdgpu_kernel void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
- %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
- call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 32, i1 true)
+ %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true)
ret void
}
; CHECK-LABEL: @volatile_memset_global_to_flat(
-; CHECK: addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
-; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %1, i8 4, i64 32, i1 true)
+; CHECK: addrspacecast i8 addrspace(1)* %global.ptr to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 4, i64 32, i1 true)
define amdgpu_kernel void @volatile_memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
- %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
- call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 32, i1 true)
+ %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true)
ret void
}
-declare void @llvm.memset.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8, i64, i1) #1
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1
attributes #0 = { nounwind }
attributes #1 = { argmemonly nounwind }
Modified: llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll (original)
+++ llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll Fri Feb 2 08:07:16 2018
@@ -1,38 +1,37 @@
-; RUN: opt -S -load-store-vectorizer -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
-; RUN: opt -S -load-store-vectorizer -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
+; RUN: opt -data-layout=A5 -S -load-store-vectorizer -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
+; RUN: opt -data-layout=A5 -S -load-store-vectorizer -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
-target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"
; ALL-LABEL: @load_unknown_offset_align1_i8(
; ALL: alloca [128 x i8], align 1
-; UNALIGNED: load <2 x i8>, <2 x i8>* %{{[0-9]+}}, align 1{{$}}
+; UNALIGNED: load <2 x i8>, <2 x i8> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
-; ALIGNED: load i8, i8* %ptr0, align 1{{$}}
-; ALIGNED: load i8, i8* %ptr1, align 1{{$}}
+; ALIGNED: load i8, i8 addrspace(5)* %ptr0, align 1{{$}}
+; ALIGNED: load i8, i8 addrspace(5)* %ptr1, align 1{{$}}
define amdgpu_kernel void @load_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i8], align 1
- %ptr0 = getelementptr inbounds [128 x i8], [128 x i8]* %alloca, i32 0, i32 %offset
- %val0 = load i8, i8* %ptr0, align 1
- %ptr1 = getelementptr inbounds i8, i8* %ptr0, i32 1
- %val1 = load i8, i8* %ptr1, align 1
+ %alloca = alloca [128 x i8], align 1, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i8], [128 x i8] addrspace(5)* %alloca, i32 0, i32 %offset
+ %val0 = load i8, i8 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i8, i8 addrspace(5)* %ptr0, i32 1
+ %val1 = load i8, i8 addrspace(5)* %ptr1, align 1
%add = add i8 %val0, %val1
store i8 %add, i8 addrspace(1)* %out
ret void
}
; ALL-LABEL: @load_unknown_offset_align1_i16(
-; ALL: alloca [128 x i16], align 1{{$}}
-; UNALIGNED: load <2 x i16>, <2 x i16>* %{{[0-9]+}}, align 1{{$}}
+; ALL: alloca [128 x i16], align 1, addrspace(5){{$}}
+; UNALIGNED: load <2 x i16>, <2 x i16> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
-; ALIGNED: load i16, i16* %ptr0, align 1{{$}}
-; ALIGNED: load i16, i16* %ptr1, align 1{{$}}
+; ALIGNED: load i16, i16 addrspace(5)* %ptr0, align 1{{$}}
+; ALIGNED: load i16, i16 addrspace(5)* %ptr1, align 1{{$}}
define amdgpu_kernel void @load_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i16], align 1
- %ptr0 = getelementptr inbounds [128 x i16], [128 x i16]* %alloca, i32 0, i32 %offset
- %val0 = load i16, i16* %ptr0, align 1
- %ptr1 = getelementptr inbounds i16, i16* %ptr0, i32 1
- %val1 = load i16, i16* %ptr1, align 1
+ %alloca = alloca [128 x i16], align 1, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i16], [128 x i16] addrspace(5)* %alloca, i32 0, i32 %offset
+ %val0 = load i16, i16 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i16, i16 addrspace(5)* %ptr0, i32 1
+ %val1 = load i16, i16 addrspace(5)* %ptr1, align 1
%add = add i16 %val0, %val1
store i16 %add, i16 addrspace(1)* %out
ret void
@@ -43,16 +42,16 @@ define amdgpu_kernel void @load_unknown_
; ALL-LABEL: @load_unknown_offset_align1_i32(
; ALL: alloca [128 x i32], align 1
-; UNALIGNED: load <2 x i32>, <2 x i32>* %{{[0-9]+}}, align 1{{$}}
+; UNALIGNED: load <2 x i32>, <2 x i32> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
-; ALIGNED: load i32, i32* %ptr0, align 1
-; ALIGNED: load i32, i32* %ptr1, align 1
+; ALIGNED: load i32, i32 addrspace(5)* %ptr0, align 1
+; ALIGNED: load i32, i32 addrspace(5)* %ptr1, align 1
define amdgpu_kernel void @load_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i32], align 1
- %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset
- %val0 = load i32, i32* %ptr0, align 1
- %ptr1 = getelementptr inbounds i32, i32* %ptr0, i32 1
- %val1 = load i32, i32* %ptr1, align 1
+ %alloca = alloca [128 x i32], align 1, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca, i32 0, i32 %offset
+ %val0 = load i32, i32 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %ptr0, i32 1
+ %val1 = load i32, i32 addrspace(5)* %ptr1, align 1
%add = add i32 %val0, %val1
store i32 %add, i32 addrspace(1)* %out
ret void
@@ -63,17 +62,17 @@ define amdgpu_kernel void @load_unknown_
; ALL-LABEL: @load_alloca16_unknown_offset_align1_i32(
; ALL: alloca [128 x i32], align 16
-; UNALIGNED: load <2 x i32>, <2 x i32>* %{{[0-9]+}}, align 1{{$}}
+; UNALIGNED: load <2 x i32>, <2 x i32> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
; FIXME: Should change alignment
; ALIGNED: load i32
; ALIGNED: load i32
define amdgpu_kernel void @load_alloca16_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i32], align 16
- %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset
- %val0 = load i32, i32* %ptr0, align 1
- %ptr1 = getelementptr inbounds i32, i32* %ptr0, i32 1
- %val1 = load i32, i32* %ptr1, align 1
+ %alloca = alloca [128 x i32], align 16, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca, i32 0, i32 %offset
+ %val0 = load i32, i32 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %ptr0, i32 1
+ %val1 = load i32, i32 addrspace(5)* %ptr1, align 1
%add = add i32 %val0, %val1
store i32 %add, i32 addrspace(1)* %out
ret void
@@ -81,31 +80,31 @@ define amdgpu_kernel void @load_alloca16
; ALL-LABEL: @store_unknown_offset_align1_i8(
; ALL: alloca [128 x i8], align 1
-; UNALIGNED: store <2 x i8> <i8 9, i8 10>, <2 x i8>* %{{[0-9]+}}, align 1{{$}}
+; UNALIGNED: store <2 x i8> <i8 9, i8 10>, <2 x i8> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
-; ALIGNED: store i8 9, i8* %ptr0, align 1{{$}}
-; ALIGNED: store i8 10, i8* %ptr1, align 1{{$}}
+; ALIGNED: store i8 9, i8 addrspace(5)* %ptr0, align 1{{$}}
+; ALIGNED: store i8 10, i8 addrspace(5)* %ptr1, align 1{{$}}
define amdgpu_kernel void @store_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i8], align 1
- %ptr0 = getelementptr inbounds [128 x i8], [128 x i8]* %alloca, i32 0, i32 %offset
- store i8 9, i8* %ptr0, align 1
- %ptr1 = getelementptr inbounds i8, i8* %ptr0, i32 1
- store i8 10, i8* %ptr1, align 1
+ %alloca = alloca [128 x i8], align 1, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i8], [128 x i8] addrspace(5)* %alloca, i32 0, i32 %offset
+ store i8 9, i8 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i8, i8 addrspace(5)* %ptr0, i32 1
+ store i8 10, i8 addrspace(5)* %ptr1, align 1
ret void
}
; ALL-LABEL: @store_unknown_offset_align1_i16(
; ALL: alloca [128 x i16], align 1
-; UNALIGNED: store <2 x i16> <i16 9, i16 10>, <2 x i16>* %{{[0-9]+}}, align 1{{$}}
+; UNALIGNED: store <2 x i16> <i16 9, i16 10>, <2 x i16> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
-; ALIGNED: store i16 9, i16* %ptr0, align 1{{$}}
-; ALIGNED: store i16 10, i16* %ptr1, align 1{{$}}
+; ALIGNED: store i16 9, i16 addrspace(5)* %ptr0, align 1{{$}}
+; ALIGNED: store i16 10, i16 addrspace(5)* %ptr1, align 1{{$}}
define amdgpu_kernel void @store_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i16], align 1
- %ptr0 = getelementptr inbounds [128 x i16], [128 x i16]* %alloca, i32 0, i32 %offset
- store i16 9, i16* %ptr0, align 1
- %ptr1 = getelementptr inbounds i16, i16* %ptr0, i32 1
- store i16 10, i16* %ptr1, align 1
+ %alloca = alloca [128 x i16], align 1, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i16], [128 x i16] addrspace(5)* %alloca, i32 0, i32 %offset
+ store i16 9, i16 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i16, i16 addrspace(5)* %ptr0, i32 1
+ store i16 10, i16 addrspace(5)* %ptr1, align 1
ret void
}
@@ -115,16 +114,16 @@ define amdgpu_kernel void @store_unknown
; ALL-LABEL: @store_unknown_offset_align1_i32(
; ALL: alloca [128 x i32], align 1
-; UNALIGNED: store <2 x i32> <i32 9, i32 10>, <2 x i32>* %{{[0-9]+}}, align 1{{$}}
+; UNALIGNED: store <2 x i32> <i32 9, i32 10>, <2 x i32> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
-; ALIGNED: store i32 9, i32* %ptr0, align 1
-; ALIGNED: store i32 10, i32* %ptr1, align 1
+; ALIGNED: store i32 9, i32 addrspace(5)* %ptr0, align 1
+; ALIGNED: store i32 10, i32 addrspace(5)* %ptr1, align 1
define amdgpu_kernel void @store_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i32], align 1
- %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset
- store i32 9, i32* %ptr0, align 1
- %ptr1 = getelementptr inbounds i32, i32* %ptr0, i32 1
- store i32 10, i32* %ptr1, align 1
+ %alloca = alloca [128 x i32], align 1, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca, i32 0, i32 %offset
+ store i32 9, i32 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %ptr0, i32 1
+ store i32 10, i32 addrspace(5)* %ptr1, align 1
ret void
}
Modified: llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll (original)
+++ llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll Fri Feb 2 08:07:16 2018
@@ -5,7 +5,6 @@
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+max-private-element-size-8,+unaligned-scratch-access -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=ELT8,ELT8-UNALIGNED,UNALIGNED,ALL %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+max-private-element-size-16,+unaligned-scratch-access -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=ELT16,ELT16-UNALIGNED,UNALIGNED,ALL %s
-target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i32
; ALIGNED: store i32
@@ -17,52 +16,52 @@ target datalayout = "e-p:32:32-p1:64:64-
; ELT8-UNALIGNED: store <2 x i32>
; ELT16-UNALIGNED: store <4 x i32>
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32(i32* %out) #0 {
- %out.gep.1 = getelementptr i32, i32* %out, i32 1
- %out.gep.2 = getelementptr i32, i32* %out, i32 2
- %out.gep.3 = getelementptr i32, i32* %out, i32 3
-
- store i32 9, i32* %out
- store i32 1, i32* %out.gep.1
- store i32 23, i32* %out.gep.2
- store i32 19, i32* %out.gep.3
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32(i32 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(5)* %out, i32 3
+
+ store i32 9, i32 addrspace(5)* %out
+ store i32 1, i32 addrspace(5)* %out.gep.1
+ store i32 23, i32 addrspace(5)* %out.gep.2
+ store i32 19, i32 addrspace(5)* %out.gep.3
ret void
}
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i32_align1(
-; ALIGNED: store i32 9, i32* %out, align 1
-; ALIGNED: store i32 1, i32* %out.gep.1, align 1
-; ALIGNED: store i32 23, i32* %out.gep.2, align 1
-; ALIGNED: store i32 19, i32* %out.gep.3, align 1
+; ALIGNED: store i32 9, i32 addrspace(5)* %out, align 1
+; ALIGNED: store i32 1, i32 addrspace(5)* %out.gep.1, align 1
+; ALIGNED: store i32 23, i32 addrspace(5)* %out.gep.2, align 1
+; ALIGNED: store i32 19, i32 addrspace(5)* %out.gep.3, align 1
-; ELT16-UNALIGNED: store <4 x i32> <i32 9, i32 1, i32 23, i32 19>, <4 x i32>* %1, align 1
+; ELT16-UNALIGNED: store <4 x i32> <i32 9, i32 1, i32 23, i32 19>, <4 x i32> addrspace(5)* %1, align 1
-; ELT8-UNALIGNED: store <2 x i32> <i32 9, i32 1>, <2 x i32>* %1, align 1
-; ELT8-UNALIGNED: store <2 x i32> <i32 23, i32 19>, <2 x i32>* %2, align 1
+; ELT8-UNALIGNED: store <2 x i32> <i32 9, i32 1>, <2 x i32> addrspace(5)* %1, align 1
+; ELT8-UNALIGNED: store <2 x i32> <i32 23, i32 19>, <2 x i32> addrspace(5)* %2, align 1
; ELT4-UNALIGNED: store i32
; ELT4-UNALIGNED: store i32
; ELT4-UNALIGNED: store i32
; ELT4-UNALIGNED: store i32
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align1(i32* %out) #0 {
- %out.gep.1 = getelementptr i32, i32* %out, i32 1
- %out.gep.2 = getelementptr i32, i32* %out, i32 2
- %out.gep.3 = getelementptr i32, i32* %out, i32 3
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align1(i32 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(5)* %out, i32 3
- store i32 9, i32* %out, align 1
- store i32 1, i32* %out.gep.1, align 1
- store i32 23, i32* %out.gep.2, align 1
- store i32 19, i32* %out.gep.3, align 1
+ store i32 9, i32 addrspace(5)* %out, align 1
+ store i32 1, i32 addrspace(5)* %out.gep.1, align 1
+ store i32 23, i32 addrspace(5)* %out.gep.2, align 1
+ store i32 19, i32 addrspace(5)* %out.gep.3, align 1
ret void
}
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i32_align2(
-; ALIGNED: store i32 9, i32* %out, align 2
-; ALIGNED: store i32 1, i32* %out.gep.1, align 2
-; ALIGNED: store i32 23, i32* %out.gep.2, align 2
-; ALIGNED: store i32 19, i32* %out.gep.3, align 2
+; ALIGNED: store i32 9, i32 addrspace(5)* %out, align 2
+; ALIGNED: store i32 1, i32 addrspace(5)* %out.gep.1, align 2
+; ALIGNED: store i32 23, i32 addrspace(5)* %out.gep.2, align 2
+; ALIGNED: store i32 19, i32 addrspace(5)* %out.gep.3, align 2
-; ELT16-UNALIGNED: store <4 x i32> <i32 9, i32 1, i32 23, i32 19>, <4 x i32>* %1, align 2
+; ELT16-UNALIGNED: store <4 x i32> <i32 9, i32 1, i32 23, i32 19>, <4 x i32> addrspace(5)* %1, align 2
; ELT8-UNALIGNED: store <2 x i32>
; ELT8-UNALIGNED: store <2 x i32>
@@ -71,29 +70,29 @@ define amdgpu_kernel void @merge_private
; ELT4-UNALIGNED: store i32
; ELT4-UNALIGNED: store i32
; ELT4-UNALIGNED: store i32
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align2(i32* %out) #0 {
- %out.gep.1 = getelementptr i32, i32* %out, i32 1
- %out.gep.2 = getelementptr i32, i32* %out, i32 2
- %out.gep.3 = getelementptr i32, i32* %out, i32 3
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align2(i32 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(5)* %out, i32 3
- store i32 9, i32* %out, align 2
- store i32 1, i32* %out.gep.1, align 2
- store i32 23, i32* %out.gep.2, align 2
- store i32 19, i32* %out.gep.3, align 2
+ store i32 9, i32 addrspace(5)* %out, align 2
+ store i32 1, i32 addrspace(5)* %out.gep.1, align 2
+ store i32 23, i32 addrspace(5)* %out.gep.2, align 2
+ store i32 19, i32 addrspace(5)* %out.gep.3, align 2
ret void
}
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i8(
; ALL: store <4 x i8>
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8(i8* %out) #0 {
- %out.gep.1 = getelementptr i8, i8* %out, i32 1
- %out.gep.2 = getelementptr i8, i8* %out, i32 2
- %out.gep.3 = getelementptr i8, i8* %out, i32 3
-
- store i8 9, i8* %out, align 4
- store i8 1, i8* %out.gep.1
- store i8 23, i8* %out.gep.2
- store i8 19, i8* %out.gep.3
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8(i8 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i8, i8 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i8, i8 addrspace(5)* %out, i32 2
+ %out.gep.3 = getelementptr i8, i8 addrspace(5)* %out, i32 3
+
+ store i8 9, i8 addrspace(5)* %out, align 4
+ store i8 1, i8 addrspace(5)* %out.gep.1
+ store i8 23, i8 addrspace(5)* %out.gep.2
+ store i8 19, i8 addrspace(5)* %out.gep.3
ret void
}
@@ -103,26 +102,26 @@ define amdgpu_kernel void @merge_private
; ALIGNED: store i8
; ALIGNED: store i8
-; UNALIGNED: store <4 x i8> <i8 9, i8 1, i8 23, i8 19>, <4 x i8>* %1, align 1
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8_align1(i8* %out) #0 {
- %out.gep.1 = getelementptr i8, i8* %out, i32 1
- %out.gep.2 = getelementptr i8, i8* %out, i32 2
- %out.gep.3 = getelementptr i8, i8* %out, i32 3
-
- store i8 9, i8* %out, align 1
- store i8 1, i8* %out.gep.1, align 1
- store i8 23, i8* %out.gep.2, align 1
- store i8 19, i8* %out.gep.3, align 1
+; UNALIGNED: store <4 x i8> <i8 9, i8 1, i8 23, i8 19>, <4 x i8> addrspace(5)* %1, align 1
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8_align1(i8 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i8, i8 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i8, i8 addrspace(5)* %out, i32 2
+ %out.gep.3 = getelementptr i8, i8 addrspace(5)* %out, i32 3
+
+ store i8 9, i8 addrspace(5)* %out, align 1
+ store i8 1, i8 addrspace(5)* %out.gep.1, align 1
+ store i8 23, i8 addrspace(5)* %out.gep.2, align 1
+ store i8 19, i8 addrspace(5)* %out.gep.3, align 1
ret void
}
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v2i16(
; ALL: store <2 x i16>
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16(i16* %out) #0 {
- %out.gep.1 = getelementptr i16, i16* %out, i32 1
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16(i16 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i16, i16 addrspace(5)* %out, i32 1
- store i16 9, i16* %out, align 4
- store i16 12, i16* %out.gep.1
+ store i16 9, i16 addrspace(5)* %out, align 4
+ store i16 12, i16 addrspace(5)* %out.gep.1
ret void
}
@@ -130,12 +129,12 @@ define amdgpu_kernel void @merge_private
; ALIGNED: store i16
; ALIGNED: store i16
-; UNALIGNED: store <2 x i16> <i16 9, i16 12>, <2 x i16>* %1, align 2
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align2(i16* %out) #0 {
- %out.gep.1 = getelementptr i16, i16* %out, i32 1
+; UNALIGNED: store <2 x i16> <i16 9, i16 12>, <2 x i16> addrspace(5)* %1, align 2
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align2(i16 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i16, i16 addrspace(5)* %out, i32 1
- store i16 9, i16* %out, align 2
- store i16 12, i16* %out.gep.1, align 2
+ store i16 9, i16 addrspace(5)* %out, align 2
+ store i16 12, i16 addrspace(5)* %out.gep.1, align 2
ret void
}
@@ -143,22 +142,22 @@ define amdgpu_kernel void @merge_private
; ALIGNED: store i16
; ALIGNED: store i16
-; UNALIGNED: store <2 x i16> <i16 9, i16 12>, <2 x i16>* %1, align 1
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align1(i16* %out) #0 {
- %out.gep.1 = getelementptr i16, i16* %out, i32 1
+; UNALIGNED: store <2 x i16> <i16 9, i16 12>, <2 x i16> addrspace(5)* %1, align 1
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align1(i16 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i16, i16 addrspace(5)* %out, i32 1
- store i16 9, i16* %out, align 1
- store i16 12, i16* %out.gep.1, align 1
+ store i16 9, i16 addrspace(5)* %out, align 1
+ store i16 12, i16 addrspace(5)* %out.gep.1, align 1
ret void
}
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v2i16_align8(
-; ALL: store <2 x i16> <i16 9, i16 12>, <2 x i16>* %1, align 8
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align8(i16* %out) #0 {
- %out.gep.1 = getelementptr i16, i16* %out, i32 1
+; ALL: store <2 x i16> <i16 9, i16 12>, <2 x i16> addrspace(5)* %1, align 8
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align8(i16 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i16, i16 addrspace(5)* %out, i32 1
- store i16 9, i16* %out, align 8
- store i16 12, i16* %out.gep.1, align 2
+ store i16 9, i16 addrspace(5)* %out, align 8
+ store i16 12, i16 addrspace(5)* %out.gep.1, align 2
ret void
}
@@ -179,13 +178,13 @@ define amdgpu_kernel void @merge_private
; ELT16-ALIGNED: store i32
; ELT16-UNALIGNED: store <3 x i32>
-define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32(i32* %out) #0 {
- %out.gep.1 = getelementptr i32, i32* %out, i32 1
- %out.gep.2 = getelementptr i32, i32* %out, i32 2
-
- store i32 9, i32* %out
- store i32 1, i32* %out.gep.1
- store i32 23, i32* %out.gep.2
+define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32(i32 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2
+
+ store i32 9, i32 addrspace(5)* %out
+ store i32 1, i32 addrspace(5)* %out.gep.1
+ store i32 23, i32 addrspace(5)* %out.gep.2
ret void
}
@@ -202,13 +201,13 @@ define amdgpu_kernel void @merge_private
; ELT8-UNALIGNED: store i32
; ELT16-UNALIGNED: store <3 x i32>
-define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32_align1(i32* %out) #0 {
- %out.gep.1 = getelementptr i32, i32* %out, i32 1
- %out.gep.2 = getelementptr i32, i32* %out, i32 2
-
- store i32 9, i32* %out, align 1
- store i32 1, i32* %out.gep.1, align 1
- store i32 23, i32* %out.gep.2, align 1
+define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32_align1(i32 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2
+
+ store i32 9, i32 addrspace(5)* %out, align 1
+ store i32 1, i32 addrspace(5)* %out.gep.1, align 1
+ store i32 23, i32 addrspace(5)* %out.gep.2, align 1
ret void
}
@@ -218,13 +217,13 @@ define amdgpu_kernel void @merge_private
; ALIGNED: store i8
; UNALIGNED: store <3 x i8>
-define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i8_align1(i8* %out) #0 {
- %out.gep.1 = getelementptr i8, i8* %out, i8 1
- %out.gep.2 = getelementptr i8, i8* %out, i8 2
-
- store i8 9, i8* %out, align 1
- store i8 1, i8* %out.gep.1, align 1
- store i8 23, i8* %out.gep.2, align 1
+define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i8_align1(i8 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i8, i8 addrspace(5)* %out, i8 1
+ %out.gep.2 = getelementptr i8, i8 addrspace(5)* %out, i8 2
+
+ store i8 9, i8 addrspace(5)* %out, align 1
+ store i8 1, i8 addrspace(5)* %out.gep.1, align 1
+ store i8 23, i8 addrspace(5)* %out.gep.2, align 1
ret void
}
Modified: llvm/trunk/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll (original)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll Fri Feb 2 08:07:16 2018
@@ -1,6 +1,5 @@
; RUN: llc < %s | FileCheck %s
-target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"
; We need to compile this for a target where we have different address spaces,
@@ -21,9 +20,9 @@ entry:
loop:
%idx0 = phi i32 [ %next_idx0, %loop ], [ 0, %entry ]
- %0 = getelementptr inbounds i32, i32* null, i32 %idx0
+ %0 = getelementptr inbounds i32, i32 addrspace(5)* null, i32 %idx0
%1 = getelementptr inbounds i32, i32 addrspace(1)* null, i32 %idx0
- store i32 1, i32* %0
+ store i32 1, i32 addrspace(5)* %0
store i32 7, i32 addrspace(1)* %1
%next_idx0 = add nuw nsw i32 %idx0, 1
br label %loop
Modified: llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll?rev=324101&r1=324100&r2=324101&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll Fri Feb 2 08:07:16 2018
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S -amdgpu-unroll-threshold-private=20000 %s | FileCheck %s
+; RUN: opt -data-layout=A5 -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S -amdgpu-unroll-threshold-private=20000 %s | FileCheck %s
; Check that we full unroll loop to be able to eliminate alloca
; CHECK-LABEL: @non_invariant_ind
@@ -9,13 +9,13 @@
define amdgpu_kernel void @non_invariant_ind(i32 addrspace(1)* nocapture %a, i32 %x) {
entry:
- %arr = alloca [64 x i32], align 4
+ %arr = alloca [64 x i32], align 4, addrspace(5)
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
br label %for.body
for.cond.cleanup: ; preds = %for.body
- %arrayidx5 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %x
- %tmp15 = load i32, i32* %arrayidx5, align 4
+ %arrayidx5 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %arr, i32 0, i32 %x
+ %tmp15 = load i32, i32 addrspace(5)* %arrayidx5, align 4
%arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
ret void
@@ -27,8 +27,8 @@ for.body:
%tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4
%add = add nsw i32 %i.015, %tmp1
%rem = srem i32 %add, 64
- %arrayidx3 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %rem
- store i32 %tmp16, i32* %arrayidx3, align 4
+ %arrayidx3 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %arr, i32 0, i32 %rem
+ store i32 %tmp16, i32 addrspace(5)* %arrayidx3, align 4
%inc = add nuw nsw i32 %i.015, 1
%exitcond = icmp eq i32 %inc, 100
br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -42,7 +42,7 @@ for.body:
define amdgpu_kernel void @invariant_ind(i32 addrspace(1)* nocapture %a, i32 %x) {
entry:
- %arr = alloca [64 x i32], align 4
+ %arr = alloca [64 x i32], align 4, addrspace(5)
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
br label %for.cond2.preheader
@@ -54,8 +54,8 @@ for.cond2.preheader:
br label %for.body6
for.cond.cleanup: ; preds = %for.cond.cleanup5
- %arrayidx13 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %x
- %tmp16 = load i32, i32* %arrayidx13, align 4
+ %arrayidx13 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %arr, i32 0, i32 %x
+ %tmp16 = load i32, i32 addrspace(5)* %arrayidx13, align 4
%arrayidx15 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
store i32 %tmp16, i32 addrspace(1)* %arrayidx15, align 4
ret void
@@ -69,8 +69,8 @@ for.body6:
%j.025 = phi i32 [ 0, %for.cond2.preheader ], [ %inc, %for.body6 ]
%add = add nsw i32 %j.025, %tmp1
%rem = srem i32 %add, 64
- %arrayidx8 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %rem
- store i32 %tmp15, i32* %arrayidx8, align 4
+ %arrayidx8 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %arr, i32 0, i32 %rem
+ store i32 %tmp15, i32 addrspace(5)* %arrayidx8, align 4
%inc = add nuw nsw i32 %j.025, 1
%exitcond = icmp eq i32 %inc, 100
br i1 %exitcond, label %for.cond.cleanup5, label %for.body6
@@ -84,13 +84,13 @@ for.body6:
define amdgpu_kernel void @too_big(i32 addrspace(1)* nocapture %a, i32 %x) {
entry:
- %arr = alloca [256 x i32], align 4
+ %arr = alloca [256 x i32], align 4, addrspace(5)
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
br label %for.body
for.cond.cleanup: ; preds = %for.body
- %arrayidx5 = getelementptr inbounds [256 x i32], [256 x i32]* %arr, i32 0, i32 %x
- %tmp15 = load i32, i32* %arrayidx5, align 4
+ %arrayidx5 = getelementptr inbounds [256 x i32], [256 x i32] addrspace(5)* %arr, i32 0, i32 %x
+ %tmp15 = load i32, i32 addrspace(5)* %arrayidx5, align 4
%arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
ret void
@@ -102,8 +102,8 @@ for.body:
%tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4
%add = add nsw i32 %i.015, %tmp1
%rem = srem i32 %add, 64
- %arrayidx3 = getelementptr inbounds [256 x i32], [256 x i32]* %arr, i32 0, i32 %rem
- store i32 %tmp16, i32* %arrayidx3, align 4
+ %arrayidx3 = getelementptr inbounds [256 x i32], [256 x i32] addrspace(5)* %arr, i32 0, i32 %rem
+ store i32 %tmp16, i32 addrspace(5)* %arrayidx3, align 4
%inc = add nuw nsw i32 %i.015, 1
%exitcond = icmp eq i32 %inc, 100
br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -118,13 +118,13 @@ for.body:
define amdgpu_kernel void @dynamic_size_alloca(i32 addrspace(1)* nocapture %a, i32 %n, i32 %x) {
entry:
- %arr = alloca i32, i32 %n, align 4
+ %arr = alloca i32, i32 %n, align 4, addrspace(5)
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
br label %for.body
for.cond.cleanup: ; preds = %for.body
- %arrayidx5 = getelementptr inbounds i32, i32* %arr, i32 %x
- %tmp15 = load i32, i32* %arrayidx5, align 4
+ %arrayidx5 = getelementptr inbounds i32, i32 addrspace(5)* %arr, i32 %x
+ %tmp15 = load i32, i32 addrspace(5)* %arrayidx5, align 4
%arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
ret void
@@ -136,8 +136,8 @@ for.body:
%tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4
%add = add nsw i32 %i.015, %tmp1
%rem = srem i32 %add, 64
- %arrayidx3 = getelementptr inbounds i32, i32* %arr, i32 %rem
- store i32 %tmp16, i32* %arrayidx3, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32 addrspace(5)* %arr, i32 %rem
+ store i32 %tmp16, i32 addrspace(5)* %arrayidx3, align 4
%inc = add nuw nsw i32 %i.015, 1
%exitcond = icmp eq i32 %inc, 100
br i1 %exitcond, label %for.cond.cleanup, label %for.body
More information about the llvm-commits
mailing list