[llvm] r325030 - [AMDGPU] Change constant addr space to 4
Yaxun Liu via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 13 10:00:27 PST 2018
Author: yaxunl
Date: Tue Feb 13 10:00:25 2018
New Revision: 325030
URL: http://llvm.org/viewvc/llvm-project?rev=325030&view=rev
Log:
[AMDGPU] Change constant addr space to 4
Differential Revision: https://reviews.llvm.org/D43170
Modified:
llvm/trunk/docs/AMDGPUUsage.rst
llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/smrd.ll
llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll
llvm/trunk/test/CodeGen/AMDGPU/addrspacecast.ll
llvm/trunk/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll
llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll
llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
llvm/trunk/test/CodeGen/AMDGPU/early-if-convert-cost.ll
llvm/trunk/test/CodeGen/AMDGPU/early-if-convert.ll
llvm/trunk/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll
llvm/trunk/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
llvm/trunk/test/CodeGen/AMDGPU/fence-barrier.ll
llvm/trunk/test/CodeGen/AMDGPU/function-returns.ll
llvm/trunk/test/CodeGen/AMDGPU/global-constant.ll
llvm/trunk/test/CodeGen/AMDGPU/gv-const-addrspace.ll
llvm/trunk/test/CodeGen/AMDGPU/hsa-func-align.ll
llvm/trunk/test/CodeGen/AMDGPU/hsa-func.ll
llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
llvm/trunk/test/CodeGen/AMDGPU/image-schedule.ll
llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
llvm/trunk/test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.hsa.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.memcpy.ll
llvm/trunk/test/CodeGen/AMDGPU/load-constant-f64.ll
llvm/trunk/test/CodeGen/AMDGPU/load-constant-i1.ll
llvm/trunk/test/CodeGen/AMDGPU/load-constant-i16.ll
llvm/trunk/test/CodeGen/AMDGPU/load-constant-i32.ll
llvm/trunk/test/CodeGen/AMDGPU/load-constant-i64.ll
llvm/trunk/test/CodeGen/AMDGPU/load-constant-i8.ll
llvm/trunk/test/CodeGen/AMDGPU/load-hi16.ll
llvm/trunk/test/CodeGen/AMDGPU/load-lo16.ll
llvm/trunk/test/CodeGen/AMDGPU/mad24-get-global-id.ll
llvm/trunk/test/CodeGen/AMDGPU/missing-store.ll
llvm/trunk/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll
llvm/trunk/test/CodeGen/AMDGPU/mubuf.ll
llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
llvm/trunk/test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll
llvm/trunk/test/CodeGen/AMDGPU/no-shrink-extloads.ll
llvm/trunk/test/CodeGen/AMDGPU/nullptr.ll
llvm/trunk/test/CodeGen/AMDGPU/pack.v2f16.ll
llvm/trunk/test/CodeGen/AMDGPU/pack.v2i16.ll
llvm/trunk/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll
llvm/trunk/test/CodeGen/AMDGPU/readcyclecounter.ll
llvm/trunk/test/CodeGen/AMDGPU/ret.ll
llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll
llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg.ll
llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll
llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll
llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll
llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll
llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
llvm/trunk/test/CodeGen/AMDGPU/smrd-vccz-bug.ll
llvm/trunk/test/CodeGen/AMDGPU/smrd.ll
llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll
llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll
llvm/trunk/test/CodeGen/AMDGPU/store-global.ll
llvm/trunk/test/CodeGen/AMDGPU/store-private.ll
llvm/trunk/test/CodeGen/AMDGPU/sub.v2i16.ll
llvm/trunk/test/CodeGen/AMDGPU/target-cpu.ll
llvm/trunk/test/CodeGen/AMDGPU/unaligned-load-store.ll
llvm/trunk/test/CodeGen/AMDGPU/uniform-crash.ll
llvm/trunk/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll
llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
llvm/trunk/test/CodeGen/AMDGPU/wait.ll
llvm/trunk/test/CodeGen/AMDGPU/waitcnt-looptest.ll
llvm/trunk/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll
llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
Modified: llvm/trunk/docs/AMDGPUUsage.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/AMDGPUUsage.rst?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/docs/AMDGPUUsage.rst (original)
+++ llvm/trunk/docs/AMDGPUUsage.rst Tue Feb 13 10:00:25 2018
@@ -270,27 +270,17 @@ LLVM Address Space number is used throug
.. table:: Address Space Mapping
:name: amdgpu-address-space-mapping-table
- ================== ================= =================
+ ================== =================
LLVM Address Space Memory Space
- ------------------ -----------------------------------
- \ Current Default Future Default
- ================== ================= =================
- 0 Generic (Flat) Generic (Flat)
- 1 Global Global
- 2 Constant Region (GDS)
- 3 Local (group/LDS) Local (group/LDS)
- 4 Region (GDS) Constant
- 5 Private (Scratch) Private (Scratch)
- 6 Constant 32-bit Constant 32-bit
- ================== ================= =================
-
-Current Default
- This is the current default address space mapping used for all languages.
- This will shortly be deprecated.
-
-Future Default
- This will shortly be the only address space mapping for all languages using
- AMDGPU backend.
+ ================== =================
+ 0 Generic (Flat)
+ 1 Global
+ 2 Region (GDS)
+ 3 Local (group/LDS)
+ 4 Constant
+ 5 Private (Scratch)
+ 6 Constant 32-bit
+ ================== =================
.. _amdgpu-memory-scopes:
Modified: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td Tue Feb 13 10:00:25 2018
@@ -83,22 +83,22 @@ defm int_amdgcn_workgroup_id : AMDGPURea
def int_amdgcn_dispatch_ptr :
GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">,
- Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
+ Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
[IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_queue_ptr :
GCCBuiltin<"__builtin_amdgcn_queue_ptr">,
- Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
+ Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
[IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_kernarg_segment_ptr :
GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
- Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
+ Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
[IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_implicitarg_ptr :
GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
- Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
+ Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
[IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_groupstaticsize :
@@ -111,7 +111,7 @@ def int_amdgcn_dispatch_id :
def int_amdgcn_implicit_buffer_ptr :
GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
- Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
+ Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
[IntrNoMem, IntrSpeculatable]>;
// Set EXEC to the 64-bit value given.
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.h?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.h Tue Feb 13 10:00:25 2018
@@ -222,7 +222,7 @@ struct AMDGPUAS {
MAX_COMMON_ADDRESS = 5,
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
- CONSTANT_ADDRESS = 2, ///< Address space for constant memory (VTX2)
+ CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2)
LOCAL_ADDRESS = 3, ///< Address space for local memory.
CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp Tue Feb 13 10:00:25 2018
@@ -61,7 +61,7 @@ AMDGPUAAResult::ASAliasRulesTy::ASAliasR
/* Region */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias}
};
static const AliasResult ASAliasRulesGenIsZero[6][6] = {
- /* Flat Global Constant Group Region Private */
+ /* Flat Global Region Group Constant Private */
/* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
/* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , NoAlias , NoAlias},
/* Constant */ {MayAlias, NoAlias , MayAlias, NoAlias , NoAlias, NoAlias},
@@ -72,9 +72,9 @@ AMDGPUAAResult::ASAliasRulesTy::ASAliasR
assert(AS.MAX_COMMON_ADDRESS <= 5);
if (AS.FLAT_ADDRESS == 0) {
assert(AS.GLOBAL_ADDRESS == 1 &&
- AS.REGION_ADDRESS == 4 &&
+ AS.REGION_ADDRESS == 2 &&
AS.LOCAL_ADDRESS == 3 &&
- AS.CONSTANT_ADDRESS == 2 &&
+ AS.CONSTANT_ADDRESS == 4 &&
AS.PRIVATE_ADDRESS == 5);
ASAliasRules = &ASAliasRulesGenIsZero;
} else {
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp Tue Feb 13 10:00:25 2018
@@ -116,7 +116,7 @@ bool AMDGPUCallLowering::lowerFormalArgu
if (Info->hasKernargSegmentPtr()) {
unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
- const LLT P2 = LLT::pointer(2, 64);
+ const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
unsigned VReg = MRI.createGenericVirtualRegister(P2);
MRI.addLiveIn(InputPtrReg, VReg);
MIRBuilder.getMBB().addLiveIn(InputPtrReg);
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp Tue Feb 13 10:00:25 2018
@@ -12,6 +12,7 @@
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
+#include "AMDGPU.h"
#include "AMDGPULegalizerInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -29,8 +30,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo
const LLT V2S16 = LLT::vector(2, 16);
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
- const LLT P1 = LLT::pointer(1, 64);
- const LLT P2 = LLT::pointer(2, 64);
+ const LLT P1 = LLT::pointer(AMDGPUAS::GLOBAL_ADDRESS, 64);
+ const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
setAction({G_ADD, S32}, Legal);
setAction({G_AND, S32}, Legal);
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Tue Feb 13 10:00:25 2018
@@ -266,7 +266,7 @@ static StringRef computeDataLayout(const
// 32-bit private, local, and region pointers. 64-bit global, constant and
// flat.
- return "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-p6:32:32"
+ return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
"-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
}
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Tue Feb 13 10:00:25 2018
@@ -929,7 +929,7 @@ AMDGPUAS getAMDGPUAS(Triple T) {
AMDGPUAS AS;
AS.FLAT_ADDRESS = 0;
AS.PRIVATE_ADDRESS = 5;
- AS.REGION_ADDRESS = 4;
+ AS.REGION_ADDRESS = 2;
return AS;
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir Tue Feb 13 10:00:25 2018
@@ -5,7 +5,7 @@
# REQUIRES: global-isel
--- |
- define amdgpu_kernel void @smrd_imm(i32 addrspace(2)* %const0) { ret void }
+ define amdgpu_kernel void @smrd_imm(i32 addrspace(4)* %const0) { ret void }
...
---
@@ -91,50 +91,50 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1
- %0:sgpr(p2) = COPY $sgpr0_sgpr1
+ %0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 4
- %2:sgpr(p2) = G_GEP %0, %1
+ %2:sgpr(p4) = G_GEP %0, %1
%3:sgpr(s32) = G_LOAD %2 :: (load 4 from %ir.const0)
$sgpr0 = COPY %3
%4:sgpr(s64) = G_CONSTANT i64 1020
- %5:sgpr(p2) = G_GEP %0, %4
+ %5:sgpr(p4) = G_GEP %0, %4
%6:sgpr(s32) = G_LOAD %5 :: (load 4 from %ir.const0)
$sgpr0 = COPY %6
%7:sgpr(s64) = G_CONSTANT i64 1024
- %8:sgpr(p2) = G_GEP %0, %7
+ %8:sgpr(p4) = G_GEP %0, %7
%9:sgpr(s32) = G_LOAD %8 :: (load 4 from %ir.const0)
$sgpr0 = COPY %9
%10:sgpr(s64) = G_CONSTANT i64 1048572
- %11:sgpr(p2) = G_GEP %0, %10
+ %11:sgpr(p4) = G_GEP %0, %10
%12:sgpr(s32) = G_LOAD %11 :: (load 4 from %ir.const0)
$sgpr0 = COPY %12
%13:sgpr(s64) = G_CONSTANT i64 1048576
- %14:sgpr(p2) = G_GEP %0, %13
+ %14:sgpr(p4) = G_GEP %0, %13
%15:sgpr(s32) = G_LOAD %14 :: (load 4 from %ir.const0)
$sgpr0 = COPY %15
%16:sgpr(s64) = G_CONSTANT i64 17179869180
- %17:sgpr(p2) = G_GEP %0, %16
+ %17:sgpr(p4) = G_GEP %0, %16
%18:sgpr(s32) = G_LOAD %17 :: (load 4 from %ir.const0)
$sgpr0 = COPY %18
%19:sgpr(s64) = G_CONSTANT i64 17179869184
- %20:sgpr(p2) = G_GEP %0, %19
+ %20:sgpr(p4) = G_GEP %0, %19
%21:sgpr(s32) = G_LOAD %20 :: (load 4 from %ir.const0)
$sgpr0 = COPY %21
%22:sgpr(s64) = G_CONSTANT i64 4294967292
- %23:sgpr(p2) = G_GEP %0, %22
+ %23:sgpr(p4) = G_GEP %0, %22
%24:sgpr(s32) = G_LOAD %23 :: (load 4 from %ir.const0)
$sgpr0 = COPY %24
%25:sgpr(s64) = G_CONSTANT i64 4294967296
- %26:sgpr(p2) = G_GEP %0, %25
+ %26:sgpr(p4) = G_GEP %0, %25
%27:sgpr(s32) = G_LOAD %26 :: (load 4 from %ir.const0)
$sgpr0 = COPY %27
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll Tue Feb 13 10:00:25 2018
@@ -18,28 +18,28 @@ define amdgpu_vs void @test_f32(float %a
}
; CHECK-LABEL: name: test_ptr2_byval
-; CHECK: [[S01:%[0-9]+]]:_(p2) = COPY $sgpr0_sgpr1
+; CHECK: [[S01:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
; CHECK: G_LOAD [[S01]]
-define amdgpu_vs void @test_ptr2_byval(i32 addrspace(2)* byval %arg0) {
- %tmp0 = load volatile i32, i32 addrspace(2)* %arg0
+define amdgpu_vs void @test_ptr2_byval(i32 addrspace(4)* byval %arg0) {
+ %tmp0 = load volatile i32, i32 addrspace(4)* %arg0
ret void
}
; CHECK-LABEL: name: test_ptr2_inreg
-; CHECK: [[S01:%[0-9]+]]:_(p2) = COPY $sgpr0_sgpr1
+; CHECK: [[S01:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
; CHECK: G_LOAD [[S01]]
-define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(2)* inreg %arg0) {
- %tmp0 = load volatile i32, i32 addrspace(2)* %arg0
+define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(4)* inreg %arg0) {
+ %tmp0 = load volatile i32, i32 addrspace(4)* %arg0
ret void
}
; CHECK-LABEL: name: test_sgpr_alignment0
; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr0
-; CHECK: [[S23:%[0-9]+]]:_(p2) = COPY $sgpr2_sgpr3
+; CHECK: [[S23:%[0-9]+]]:_(p4) = COPY $sgpr2_sgpr3
; CHECK: G_LOAD [[S23]]
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S0]]
-define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(2)* inreg %arg1) {
- %tmp0 = load volatile i32, i32 addrspace(2)* %arg1
+define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)* inreg %arg1) {
+ %tmp0 = load volatile i32, i32 addrspace(4)* %arg1
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir Tue Feb 13 10:00:25 2018
@@ -3,7 +3,7 @@
# REQUIRES: global-isel
--- |
- define amdgpu_kernel void @load_constant(i32 addrspace(2)* %ptr0) { ret void }
+ define amdgpu_kernel void @load_constant(i32 addrspace(4)* %ptr0) { ret void }
define amdgpu_kernel void @load_global_uniform(i32 addrspace(1)* %ptr1) {
%tmp0 = load i32, i32 addrspace(1)* %ptr1
ret void
@@ -30,7 +30,7 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0_sgpr1
- %0:_(p2) = COPY $sgpr0_sgpr1
+ %0:_(p4) = COPY $sgpr0_sgpr1
%1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr0)
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/smrd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/smrd.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/smrd.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/smrd.ll Tue Feb 13 10:00:25 2018
@@ -9,10 +9,10 @@
; GCN-LABEL: {{^}}smrd0:
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
-define amdgpu_kernel void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
+define amdgpu_kernel void @smrd0(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) {
entry:
- %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
- %1 = load i32, i32 addrspace(2)* %0
+ %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 1
+ %1 = load i32, i32 addrspace(4)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -21,10 +21,10 @@ entry:
; GCN-LABEL: {{^}}smrd1:
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
-define amdgpu_kernel void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
+define amdgpu_kernel void @smrd1(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) {
entry:
- %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
- %1 = load i32, i32 addrspace(2)* %0
+ %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 255
+ %1 = load i32, i32 addrspace(4)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -36,10 +36,10 @@ entry:
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
; GCN: s_endpgm
-define amdgpu_kernel void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
+define amdgpu_kernel void @smrd2(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) {
entry:
- %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
- %1 = load i32, i32 addrspace(2)* %0
+ %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 256
+ %1 = load i32, i32 addrspace(4)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -51,10 +51,10 @@ entry:
; XSI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b
; TODO: Add VI checks
; XGCN: s_endpgm
-define amdgpu_kernel void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
+define amdgpu_kernel void @smrd3(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) {
entry:
- %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
- %1 = load i32, i32 addrspace(2)* %0
+ %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 4294967296 ; 2 ^ 32
+ %1 = load i32, i32 addrspace(4)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -65,10 +65,10 @@ entry:
; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
-define amdgpu_kernel void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
+define amdgpu_kernel void @smrd4(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) {
entry:
- %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
- %1 = load i32, i32 addrspace(2)* %0
+ %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 262143
+ %1 = load i32, i32 addrspace(4)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -79,10 +79,10 @@ entry:
; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
; GCN: s_endpgm
-define amdgpu_kernel void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
+define amdgpu_kernel void @smrd5(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) {
entry:
- %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
- %1 = load i32, i32 addrspace(2)* %0
+ %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 262144
+ %1 = load i32, i32 addrspace(4)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll Tue Feb 13 10:00:25 2018
@@ -27,9 +27,9 @@ define amdgpu_kernel void @v_test_add_v2
; VI: s_add_i32
; VI: s_add_i32
-define amdgpu_kernel void @s_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in0, <2 x i16> addrspace(2)* %in1) #1 {
- %a = load <2 x i16>, <2 x i16> addrspace(2)* %in0
- %b = load <2 x i16>, <2 x i16> addrspace(2)* %in1
+define amdgpu_kernel void @s_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in0, <2 x i16> addrspace(4)* %in1) #1 {
+ %a = load <2 x i16>, <2 x i16> addrspace(4)* %in0
+ %b = load <2 x i16>, <2 x i16> addrspace(4)* %in1
%add = add <2 x i16> %a, %b
store <2 x i16> %add, <2 x i16> addrspace(1)* %out
ret void
@@ -41,8 +41,8 @@ define amdgpu_kernel void @s_test_add_v2
; VI: s_add_i32
; VI: s_add_i32
-define amdgpu_kernel void @s_test_add_self_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in0) #1 {
- %a = load <2 x i16>, <2 x i16> addrspace(2)* %in0
+define amdgpu_kernel void @s_test_add_self_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in0) #1 {
+ %a = load <2 x i16>, <2 x i16> addrspace(4)* %in0
%add = add <2 x i16> %a, %a
store <2 x i16> %add, <2 x i16> addrspace(1)* %out
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/addrspacecast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/addrspacecast.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/addrspacecast.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/addrspacecast.ll Tue Feb 13 10:00:25 2018
@@ -100,8 +100,8 @@ define amdgpu_kernel void @use_global_to
; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
; HSA: flat_load_dword v{{[0-9]+}}, v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}
-define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #0 {
- %stof = addrspacecast i32 addrspace(2)* %ptr to i32*
+define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #0 {
+ %stof = addrspacecast i32 addrspace(4)* %ptr to i32*
%ld = load volatile i32, i32* %stof
ret void
}
@@ -160,8 +160,8 @@ define amdgpu_kernel void @use_flat_to_g
; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0
; HSA: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, 0x0
define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #0 {
- %ftos = addrspacecast i32* %ptr to i32 addrspace(2)*
- load volatile i32, i32 addrspace(2)* %ftos
+ %ftos = addrspacecast i32* %ptr to i32 addrspace(4)*
+ load volatile i32, i32 addrspace(4)* %ftos
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdgcn.bitcast.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/amdgcn.bitcast.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/amdgcn.bitcast.ll Tue Feb 13 10:00:25 2018
@@ -4,9 +4,9 @@
; This test just checks that the compiler doesn't crash.
; FUNC-LABEL: {{^}}v32i8_to_v8i32:
-define amdgpu_ps float @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
+define amdgpu_ps float @v32i8_to_v8i32(<32 x i8> addrspace(4)* inreg) #0 {
entry:
- %1 = load <32 x i8>, <32 x i8> addrspace(2)* %0
+ %1 = load <32 x i8>, <32 x i8> addrspace(4)* %0
%2 = bitcast <32 x i8> %1 to <8 x i32>
%3 = extractelement <8 x i32> %2, i32 1
%4 = icmp ne i32 %3, 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll Tue Feb 13 10:00:25 2018
@@ -48,12 +48,12 @@
; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
-; HSAOPT: [[DISPATCH_PTR:%[0-9]+]] = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
-; HSAOPT: [[CAST_DISPATCH_PTR:%[0-9]+]] = bitcast i8 addrspace(2)* [[DISPATCH_PTR]] to i32 addrspace(2)*
-; HSAOPT: [[GEP0:%[0-9]+]] = getelementptr inbounds i32, i32 addrspace(2)* [[CAST_DISPATCH_PTR]], i64 1
-; HSAOPT: [[LDXY:%[0-9]+]] = load i32, i32 addrspace(2)* [[GEP0]], align 4, !invariant.load !0
-; HSAOPT: [[GEP1:%[0-9]+]] = getelementptr inbounds i32, i32 addrspace(2)* [[CAST_DISPATCH_PTR]], i64 2
-; HSAOPT: [[LDZU:%[0-9]+]] = load i32, i32 addrspace(2)* [[GEP1]], align 4, !range !1, !invariant.load !0
+; HSAOPT: [[DISPATCH_PTR:%[0-9]+]] = call noalias nonnull dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+; HSAOPT: [[CAST_DISPATCH_PTR:%[0-9]+]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)*
+; HSAOPT: [[GEP0:%[0-9]+]] = getelementptr inbounds i32, i32 addrspace(4)* [[CAST_DISPATCH_PTR]], i64 1
+; HSAOPT: [[LDXY:%[0-9]+]] = load i32, i32 addrspace(4)* [[GEP0]], align 4, !invariant.load !0
+; HSAOPT: [[GEP1:%[0-9]+]] = getelementptr inbounds i32, i32 addrspace(4)* [[CAST_DISPATCH_PTR]], i64 2
+; HSAOPT: [[LDZU:%[0-9]+]] = load i32, i32 addrspace(4)* [[GEP1]], align 4, !range !1, !invariant.load !0
; HSAOPT: [[EXTRACTY:%[0-9]+]] = lshr i32 [[LDXY]], 16
; HSAOPT: [[WORKITEM_ID_X:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.x(), !range !2
Modified: llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll Tue Feb 13 10:00:25 2018
@@ -8,10 +8,10 @@ declare i32 @llvm.amdgcn.workitem.id.x()
declare i32 @llvm.amdgcn.workitem.id.y() #0
declare i32 @llvm.amdgcn.workitem.id.z() #0
-declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
-declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
-declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
-declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #0
+declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
+declare i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
+declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
+declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
declare i64 @llvm.amdgcn.dispatch.id() #0
; HSA: define void @use_workitem_id_x() #1 {
@@ -58,15 +58,15 @@ define void @use_workgroup_id_z() #1 {
; HSA: define void @use_dispatch_ptr() #7 {
define void @use_dispatch_ptr() #1 {
- %dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
- store volatile i8 addrspace(2)* %dispatch.ptr, i8 addrspace(2)* addrspace(1)* undef
+ %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+ store volatile i8 addrspace(4)* %dispatch.ptr, i8 addrspace(4)* addrspace(1)* undef
ret void
}
; HSA: define void @use_queue_ptr() #8 {
define void @use_queue_ptr() #1 {
- %queue.ptr = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr()
- store volatile i8 addrspace(2)* %queue.ptr, i8 addrspace(2)* addrspace(1)* undef
+ %queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
+ store volatile i8 addrspace(4)* %queue.ptr, i8 addrspace(4)* addrspace(1)* undef
ret void
}
@@ -186,22 +186,22 @@ define void @call_recursive_use_workitem
; HSA: define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #8 {
define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
- %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
- store volatile i32 0, i32 addrspace(4)* %stof
+ %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(2)*
+ store volatile i32 0, i32 addrspace(2)* %stof
ret void
}
; HSA: define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #12 {
define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 {
- %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
- store volatile i32 0, i32 addrspace(4)* %stof
+ %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(2)*
+ store volatile i32 0, i32 addrspace(2)* %stof
ret void
}
; HSA: define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #13 {
define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #2 {
- %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
- store volatile i32 0, i32 addrspace(4)* %stof
+ %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(2)*
+ store volatile i32 0, i32 addrspace(2)* %stof
call void @func_indirect_use_queue_ptr()
ret void
}
@@ -226,8 +226,8 @@ define void @indirect_use_group_to_flat_
; HSA: define void @use_kernarg_segment_ptr() #14 {
define void @use_kernarg_segment_ptr() #1 {
- %kernarg.segment.ptr = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
- store volatile i8 addrspace(2)* %kernarg.segment.ptr, i8 addrspace(2)* addrspace(1)* undef
+ %kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+ store volatile i8 addrspace(4)* %kernarg.segment.ptr, i8 addrspace(4)* addrspace(1)* undef
ret void
}
@@ -239,15 +239,15 @@ define void @func_indirect_use_kernarg_s
; HSA: define amdgpu_kernel void @kern_use_implicitarg_ptr() #15 {
define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
- %implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
- store volatile i8 addrspace(2)* %implicitarg.ptr, i8 addrspace(2)* addrspace(1)* undef
+ %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef
ret void
}
; HSA: define void @use_implicitarg_ptr() #15 {
define void @use_implicitarg_ptr() #1 {
- %implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
- store volatile i8 addrspace(2)* %implicitarg.ptr, i8 addrspace(2)* addrspace(1)* undef
+ %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll Tue Feb 13 10:00:25 2018
@@ -8,9 +8,9 @@ declare i32 @llvm.amdgcn.workitem.id.x()
declare i32 @llvm.amdgcn.workitem.id.y() #0
declare i32 @llvm.amdgcn.workitem.id.z() #0
-declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
-declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
-declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
+declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
+declare i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
+declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
; HSA: define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
@@ -149,27 +149,27 @@ define amdgpu_kernel void @use_all_worki
; HSA: define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #10 {
define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
- %dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
- %bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
- %val = load i32, i32 addrspace(2)* %bc
+ %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+ %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
+ %val = load i32, i32 addrspace(4)* %bc
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #11 {
define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 {
- %dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr()
- %bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
- %val = load i32, i32 addrspace(2)* %bc
+ %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
+ %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
+ %val = load i32, i32 addrspace(4)* %bc
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #12 {
define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #1 {
- %dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
- %bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
- %val = load i32, i32 addrspace(2)* %bc
+ %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+ %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
+ %val = load i32, i32 addrspace(4)* %bc
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
@@ -210,9 +210,9 @@ define amdgpu_kernel void @use_global_to
ret void
}
-; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 {
-define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 {
- %stof = addrspacecast i32 addrspace(2)* %ptr to i32*
+; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 {
+define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 {
+ %stof = addrspacecast i32 addrspace(4)* %ptr to i32*
%ld = load volatile i32, i32* %stof
ret void
}
@@ -226,8 +226,8 @@ define amdgpu_kernel void @use_flat_to_g
; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
- %ftos = addrspacecast i32* %ptr to i32 addrspace(2)*
- %ld = load volatile i32, i32 addrspace(2)* %ftos
+ %ftos = addrspacecast i32* %ptr to i32 addrspace(4)*
+ %ld = load volatile i32, i32 addrspace(4)* %ftos
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll Tue Feb 13 10:00:25 2018
@@ -358,7 +358,7 @@ bb0:
br i1 %cmp0, label %bb2, label %bb1
bb1:
- %val = load volatile i32, i32 addrspace(2)* undef
+ %val = load volatile i32, i32 addrspace(4)* undef
%cmp1 = icmp eq i32 %val, 3
br i1 %cmp1, label %bb3, label %bb2
Modified: llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll Tue Feb 13 10:00:25 2018
@@ -345,7 +345,7 @@ define amdgpu_kernel void @test_call_ext
; GCN: s_waitcnt
; GCN-NEXT: s_swappc_b64
define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
- %ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(2)* undef
+ %ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
%val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
call void @external_void_func_v8i32(<8 x i32> %val)
ret void
@@ -359,7 +359,7 @@ define amdgpu_kernel void @test_call_ext
; GCN: s_waitcnt
; GCN-NEXT: s_swappc_b64
define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
- %ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(2)* undef
+ %ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
%val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
call void @external_void_func_v16i32(<16 x i32> %val)
ret void
@@ -377,7 +377,7 @@ define amdgpu_kernel void @test_call_ext
; GCN: s_waitcnt
; GCN-NEXT: s_swappc_b64
define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
- %ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(2)* undef
+ %ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
%val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr
call void @external_void_func_v32i32(<32 x i32> %val)
ret void
@@ -405,7 +405,7 @@ define amdgpu_kernel void @test_call_ext
; GCN: s_swappc_b64
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
- %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(2)* undef
+ %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
%val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0
%val1 = load i32, i32 addrspace(1)* undef
call void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1)
@@ -430,7 +430,7 @@ define amdgpu_kernel void @test_call_ext
; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: s_swappc_b64
define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
- %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(2)* undef
+ %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
%val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0
call void @external_void_func_struct_i8_i32({ i8, i32 } %val)
ret void
@@ -516,7 +516,7 @@ define amdgpu_kernel void @test_call_ext
; GCN-LABEL: {{^}}test_call_external_void_func_v16i8:
define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
- %ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(2)* undef
+ %ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
%val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
call void @external_void_func_v16i8(<16 x i8> %val)
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll Tue Feb 13 10:00:25 2018
@@ -4,9 +4,9 @@
; GCN-LABEL: {{^}}use_dispatch_ptr:
; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
define void @use_dispatch_ptr() #1 {
- %dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
- %header_ptr = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
- %value = load volatile i32, i32 addrspace(2)* %header_ptr
+ %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
+ %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
+ %value = load volatile i32, i32 addrspace(4)* %header_ptr
ret void
}
@@ -21,9 +21,9 @@ define amdgpu_kernel void @kern_indirect
; GCN-LABEL: {{^}}use_queue_ptr:
; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
define void @use_queue_ptr() #1 {
- %queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
- %header_ptr = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)*
- %value = load volatile i32, i32 addrspace(2)* %header_ptr
+ %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
+ %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
+ %value = load volatile i32, i32 addrspace(4)* %header_ptr
ret void
}
@@ -62,9 +62,9 @@ define amdgpu_kernel void @kern_indirect
; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
define void @use_kernarg_segment_ptr() #1 {
- %kernarg_segment_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
- %header_ptr = bitcast i8 addrspace(2)* %kernarg_segment_ptr to i32 addrspace(2)*
- %value = load volatile i32, i32 addrspace(2)* %header_ptr
+ %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
+ %header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
+ %value = load volatile i32, i32 addrspace(4)* %header_ptr
ret void
}
@@ -435,17 +435,17 @@ define void @use_every_sgpr_input() #1 {
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
- %dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
- %dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
- %val0 = load volatile i32, i32 addrspace(2)* %dispatch_ptr.bc
-
- %queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
- %queue_ptr.bc = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)*
- %val1 = load volatile i32, i32 addrspace(2)* %queue_ptr.bc
-
- %kernarg_segment_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
- %kernarg_segment_ptr.bc = bitcast i8 addrspace(2)* %kernarg_segment_ptr to i32 addrspace(2)*
- %val2 = load volatile i32, i32 addrspace(2)* %kernarg_segment_ptr.bc
+ %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
+ %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
+ %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
+
+ %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
+ %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
+ %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
+
+ %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
+ %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
+ %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
%val3 = call i64 @llvm.amdgcn.dispatch.id()
call void asm sideeffect "; use $0", "s"(i64 %val3)
@@ -515,17 +515,17 @@ define void @func_use_every_sgpr_input_c
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
- %dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
- %dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
- %val0 = load volatile i32, i32 addrspace(2)* %dispatch_ptr.bc
-
- %queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
- %queue_ptr.bc = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)*
- %val1 = load volatile i32, i32 addrspace(2)* %queue_ptr.bc
-
- %kernarg_segment_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
- %kernarg_segment_ptr.bc = bitcast i8 addrspace(2)* %kernarg_segment_ptr to i32 addrspace(2)*
- %val2 = load volatile i32, i32 addrspace(2)* %kernarg_segment_ptr.bc
+ %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
+ %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
+ %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
+
+ %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
+ %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
+ %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
+
+ %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
+ %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
+ %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
%val3 = call i64 @llvm.amdgcn.dispatch.id()
call void asm sideeffect "; use $0", "s"(i64 %val3)
@@ -573,17 +573,17 @@ define void @func_use_every_sgpr_input_c
store volatile i32 0, i32 addrspace(5)* %alloca
- %dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
- %dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
- %val0 = load volatile i32, i32 addrspace(2)* %dispatch_ptr.bc
-
- %queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
- %queue_ptr.bc = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)*
- %val1 = load volatile i32, i32 addrspace(2)* %queue_ptr.bc
-
- %kernarg_segment_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
- %kernarg_segment_ptr.bc = bitcast i8 addrspace(2)* %kernarg_segment_ptr to i32 addrspace(2)*
- %val2 = load volatile i32, i32 addrspace(2)* %kernarg_segment_ptr.bc
+ %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
+ %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
+ %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
+
+ %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
+ %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
+ %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
+
+ %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
+ %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
+ %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
%val3 = call i64 @llvm.amdgcn.dispatch.id()
call void asm sideeffect "; use $0", "s"(i64 %val3)
@@ -603,10 +603,10 @@ define void @func_use_every_sgpr_input_c
declare i32 @llvm.amdgcn.workgroup.id.x() #0
declare i32 @llvm.amdgcn.workgroup.id.y() #0
declare i32 @llvm.amdgcn.workgroup.id.z() #0
-declare noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
-declare noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
+declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
+declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
declare i64 @llvm.amdgcn.dispatch.id() #0
-declare noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
+declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
attributes #0 = { nounwind readnone speculatable }
attributes #1 = { nounwind noinline }
Modified: llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll Tue Feb 13 10:00:25 2018
@@ -87,12 +87,12 @@ define amdgpu_kernel void @test_sink_noo
entry:
%out.gep = getelementptr i32, i32* %out, i64 999999
%in.gep = getelementptr i32, i32* %in, i64 7
- %cast = addrspacecast i32* %in.gep to i32 addrspace(2)*
+ %cast = addrspacecast i32* %in.gep to i32 addrspace(4)*
%tmp0 = icmp eq i32 %cond, 0
br i1 %tmp0, label %endif, label %if
if:
- %tmp1 = load i32, i32 addrspace(2)* %cast
+ %tmp1 = load i32, i32 addrspace(4)* %cast
br label %endif
endif:
Modified: llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll Tue Feb 13 10:00:25 2018
@@ -268,23 +268,23 @@ done:
}
; OPT-LABEL: @test_sink_constant_small_offset_i32
-; OPT-NOT: getelementptr i32, i32 addrspace(2)*
+; OPT-NOT: getelementptr i32, i32 addrspace(4)*
; OPT: br i1
; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32:
; GCN: s_and_saveexec_b64
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}}
; GCN: s_or_b64 exec, exec
-define amdgpu_kernel void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
+define amdgpu_kernel void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
- %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7
+ %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
- %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ %tmp1 = load i32, i32 addrspace(4)* %in.gep
br label %endif
endif:
@@ -297,23 +297,23 @@ done:
}
; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32
-; OPT-NOT: getelementptr i32, i32 addrspace(2)*
+; OPT-NOT: getelementptr i32, i32 addrspace(4)*
; OPT: br i1
; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32:
; GCN: s_and_saveexec_b64
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}}
; GCN: s_or_b64 exec, exec
-define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
+define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
- %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255
+ %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 255
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
- %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ %tmp1 = load i32, i32 addrspace(4)* %in.gep
br label %endif
endif:
@@ -326,9 +326,9 @@ done:
}
; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32
-; OPT-SI: getelementptr i32, i32 addrspace(2)*
-; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
-; OPT-VI-NOT: getelementptr i32, i32 addrspace(2)*
+; OPT-SI: getelementptr i32, i32 addrspace(4)*
+; OPT-CI-NOT: getelementptr i32, i32 addrspace(4)*
+; OPT-VI-NOT: getelementptr i32, i32 addrspace(4)*
; OPT: br i1
; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32:
@@ -337,16 +337,16 @@ done:
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
; GCN: s_or_b64 exec, exec
-define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
+define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
- %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256
+ %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 256
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
- %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ %tmp1 = load i32, i32 addrspace(4)* %in.gep
br label %endif
endif:
@@ -359,8 +359,8 @@ done:
}
; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32
-; OPT-SI: getelementptr i32, i32 addrspace(2)*
-; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
+; OPT-SI: getelementptr i32, i32 addrspace(4)*
+; OPT-CI-NOT: getelementptr i32, i32 addrspace(4)*
; OPT: br i1
; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
@@ -369,16 +369,16 @@ done:
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
; GCN: s_or_b64 exec, exec
-define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
+define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
- %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295
+ %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 4294967295
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
- %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ %tmp1 = load i32, i32 addrspace(4)* %in.gep
br label %endif
endif:
@@ -391,7 +391,7 @@ done:
}
; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32
-; OPT: getelementptr i32, i32 addrspace(2)*
+; OPT: getelementptr i32, i32 addrspace(4)*
; OPT: br i1
; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32:
@@ -400,16 +400,16 @@ done:
; GCN: s_addc_u32
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
; GCN: s_or_b64 exec, exec
-define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
+define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
- %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181
+ %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 17179869181
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
- %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ %tmp1 = load i32, i32 addrspace(4)* %in.gep
br label %endif
endif:
@@ -430,16 +430,16 @@ done:
; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}}
; GCN: s_or_b64 exec, exec
-define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
+define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
- %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143
+ %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 262143
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
- %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ %tmp1 = load i32, i32 addrspace(4)* %in.gep
br label %endif
endif:
@@ -452,9 +452,9 @@ done:
}
; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32
-; OPT-SI: getelementptr i32, i32 addrspace(2)*
-; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
-; OPT-VI: getelementptr i32, i32 addrspace(2)*
+; OPT-SI: getelementptr i32, i32 addrspace(4)*
+; OPT-CI-NOT: getelementptr i32, i32 addrspace(4)*
+; OPT-VI: getelementptr i32, i32 addrspace(4)*
; OPT: br i1
; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32:
@@ -468,16 +468,16 @@ done:
; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
; GCN: s_or_b64 exec, exec
-define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
+define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
- %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144
+ %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 262144
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
- %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ %tmp1 = load i32, i32 addrspace(4)* %in.gep
br label %endif
endif:
@@ -524,17 +524,17 @@ bb34:
; OPT: br i1 %tmp0,
; OPT: if:
; OPT: getelementptr i8, {{.*}} 4095
-define amdgpu_kernel void @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
+define amdgpu_kernel void @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(i32 addrspace(1)* %out, i8 addrspace(4)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
- %in.gep = getelementptr i8, i8 addrspace(2)* %in, i64 4095
+ %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4095
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
- %bitcast = bitcast i8 addrspace(2)* %in.gep to i32 addrspace(2)*
- %tmp1 = load i32, i32 addrspace(2)* %bitcast, align 1
+ %bitcast = bitcast i8 addrspace(4)* %in.gep to i32 addrspace(4)*
+ %tmp1 = load i32, i32 addrspace(4)* %bitcast, align 1
br label %endif
endif:
Modified: llvm/trunk/test/CodeGen/AMDGPU/early-if-convert-cost.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/early-if-convert-cost.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/early-if-convert-cost.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/early-if-convert-cost.ll Tue Feb 13 10:00:25 2018
@@ -32,9 +32,9 @@ endif:
; GCN: v_add_f64
; GCN: v_cndmask_b32_e32
; GCN: v_cndmask_b32_e32
-define amdgpu_kernel void @test_vccnz_sgpr_ifcvt_triangle64(double addrspace(1)* %out, double addrspace(2)* %in) #0 {
+define amdgpu_kernel void @test_vccnz_sgpr_ifcvt_triangle64(double addrspace(1)* %out, double addrspace(4)* %in) #0 {
entry:
- %v = load double, double addrspace(2)* %in
+ %v = load double, double addrspace(4)* %in
%cc = fcmp oeq double %v, 1.000000e+00
br i1 %cc, label %if, label %endif
Modified: llvm/trunk/test/CodeGen/AMDGPU/early-if-convert.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/early-if-convert.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/early-if-convert.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/early-if-convert.ll Tue Feb 13 10:00:25 2018
@@ -187,9 +187,9 @@ endif:
; GCN: [[ENDIF]]:
; GCN: buffer_store_dword
-define amdgpu_kernel void @test_vccnz_sgpr_ifcvt_triangle(i32 addrspace(1)* %out, i32 addrspace(2)* %in, float %cnd) #0 {
+define amdgpu_kernel void @test_vccnz_sgpr_ifcvt_triangle(i32 addrspace(1)* %out, i32 addrspace(4)* %in, float %cnd) #0 {
entry:
- %v = load i32, i32 addrspace(2)* %in
+ %v = load i32, i32 addrspace(4)* %in
%cc = fcmp oeq float %cnd, 1.000000e+00
br i1 %cc, label %if, label %endif
@@ -206,9 +206,9 @@ endif:
; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_constant_load:
; GCN: v_cndmask_b32
-define amdgpu_kernel void @test_vccnz_ifcvt_triangle_constant_load(float addrspace(1)* %out, float addrspace(2)* %in) #0 {
+define amdgpu_kernel void @test_vccnz_ifcvt_triangle_constant_load(float addrspace(1)* %out, float addrspace(4)* %in) #0 {
entry:
- %v = load float, float addrspace(2)* %in
+ %v = load float, float addrspace(4)* %in
%cc = fcmp oeq float %v, 1.000000e+00
br i1 %cc, label %if, label %endif
@@ -248,9 +248,9 @@ endif:
; GCN: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], [[VAL]]
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
; GCN-NEXT: s_cselect_b32 [[SELECT:s[0-9]+]], [[ADD]], [[VAL]]
-define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle(i32 addrspace(2)* %in, i32 %cond) #0 {
+define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle(i32 addrspace(4)* %in, i32 %cond) #0 {
entry:
- %v = load i32, i32 addrspace(2)* %in
+ %v = load i32, i32 addrspace(4)* %in
%cc = icmp eq i32 %cond, 1
br i1 %cc, label %if, label %endif
@@ -295,9 +295,9 @@ endif:
; GCN: s_addc_u32
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle64(i64 addrspace(2)* %in, i32 %cond) #0 {
+define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle64(i64 addrspace(4)* %in, i32 %cond) #0 {
entry:
- %v = load i64, i64 addrspace(2)* %in
+ %v = load i64, i64 addrspace(4)* %in
%cc = icmp eq i32 %cond, 1
br i1 %cc, label %if, label %endif
@@ -320,9 +320,9 @@ endif:
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle96(<3 x i32> addrspace(2)* %in, i32 %cond) #0 {
+define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle96(<3 x i32> addrspace(4)* %in, i32 %cond) #0 {
entry:
- %v = load <3 x i32>, <3 x i32> addrspace(2)* %in
+ %v = load <3 x i32>, <3 x i32> addrspace(4)* %in
%cc = icmp eq i32 %cond, 1
br i1 %cc, label %if, label %endif
@@ -345,9 +345,9 @@ endif:
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle128(<4 x i32> addrspace(2)* %in, i32 %cond) #0 {
+define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle128(<4 x i32> addrspace(4)* %in, i32 %cond) #0 {
entry:
- %v = load <4 x i32>, <4 x i32> addrspace(2)* %in
+ %v = load <4 x i32>, <4 x i32> addrspace(4)* %in
%cc = icmp eq i32 %cond, 1
br i1 %cc, label %if, label %endif
Modified: llvm/trunk/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll Tue Feb 13 10:00:25 2018
@@ -8,8 +8,8 @@
; GCN-DAG: v_mov_b32_e32 [[VELT1:v[0-9]+]], [[ELT1]]
; GCN-DAG: buffer_store_short [[VELT0]]
; GCN-DAG: buffer_store_short [[VELT1]]
-define amdgpu_kernel void @extract_vector_elt_v2f16(half addrspace(1)* %out, <2 x half> addrspace(2)* %vec.ptr) #0 {
- %vec = load <2 x half>, <2 x half> addrspace(2)* %vec.ptr
+define amdgpu_kernel void @extract_vector_elt_v2f16(half addrspace(1)* %out, <2 x half> addrspace(4)* %vec.ptr) #0 {
+ %vec = load <2 x half>, <2 x half> addrspace(4)* %vec.ptr
%p0 = extractelement <2 x half> %vec, i32 0
%p1 = extractelement <2 x half> %vec, i32 1
%out1 = getelementptr half, half addrspace(1)* %out, i32 10
@@ -26,8 +26,8 @@ define amdgpu_kernel void @extract_vecto
; GCN: v_mov_b32_e32 [[VELT1:v[0-9]+]], [[ELT1]]
; GCN: buffer_store_short [[VELT1]]
; GCN: ScratchSize: 0
-define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_sgpr(half addrspace(1)* %out, <2 x half> addrspace(2)* %vec.ptr, i32 %idx) #0 {
- %vec = load <2 x half>, <2 x half> addrspace(2)* %vec.ptr
+define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_sgpr(half addrspace(1)* %out, <2 x half> addrspace(4)* %vec.ptr, i32 %idx) #0 {
+ %vec = load <2 x half>, <2 x half> addrspace(4)* %vec.ptr
%elt = extractelement <2 x half> %vec, i32 %idx
store half %elt, half addrspace(1)* %out, align 2
ret void
@@ -45,12 +45,12 @@ define amdgpu_kernel void @extract_vecto
; SI: buffer_store_short [[ELT]]
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[ELT]]
; GCN: ScratchSize: 0{{$}}
-define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_vgpr(half addrspace(1)* %out, <2 x half> addrspace(2)* %vec.ptr, i32 addrspace(1)* %idx.ptr) #0 {
+define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_vgpr(half addrspace(1)* %out, <2 x half> addrspace(4)* %vec.ptr, i32 addrspace(1)* %idx.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %idx.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
- %vec = load <2 x half>, <2 x half> addrspace(2)* %vec.ptr
+ %vec = load <2 x half>, <2 x half> addrspace(4)* %vec.ptr
%idx = load i32, i32 addrspace(1)* %gep
%elt = extractelement <2 x half> %vec, i32 %idx
store half %elt, half addrspace(1)* %out.gep, align 2
Modified: llvm/trunk/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll Tue Feb 13 10:00:25 2018
@@ -9,8 +9,8 @@
; GCN-DAG: v_mov_b32_e32 [[VELT1:v[0-9]+]], [[ELT1]]
; GCN-DAG: buffer_store_short [[VELT0]]
; GCN-DAG: buffer_store_short [[VELT1]]
-define amdgpu_kernel void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr) #0 {
- %vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
+define amdgpu_kernel void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr) #0 {
+ %vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%p0 = extractelement <2 x i16> %vec, i32 0
%p1 = extractelement <2 x i16> %vec, i32 1
%out1 = getelementptr i16, i16 addrspace(1)* %out, i32 10
@@ -27,8 +27,8 @@ define amdgpu_kernel void @extract_vecto
; GCN: v_mov_b32_e32 [[VELT1:v[0-9]+]], [[ELT1]]
; GCN: buffer_store_short [[VELT1]]
; GCN: ScratchSize: 0
-define amdgpu_kernel void @extract_vector_elt_v2i16_dynamic_sgpr(i16 addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i32 %idx) #0 {
- %vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
+define amdgpu_kernel void @extract_vector_elt_v2i16_dynamic_sgpr(i16 addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i32 %idx) #0 {
+ %vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%elt = extractelement <2 x i16> %vec, i32 %idx
store i16 %elt, i16 addrspace(1)* %out, align 2
ret void
@@ -45,13 +45,13 @@ define amdgpu_kernel void @extract_vecto
; SI: buffer_store_short [[ELT]]
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[ELT]]
; GCN: ScratchSize: 0{{$}}
-define amdgpu_kernel void @extract_vector_elt_v2i16_dynamic_vgpr(i16 addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i32 addrspace(1)* %idx.ptr) #0 {
+define amdgpu_kernel void @extract_vector_elt_v2i16_dynamic_vgpr(i16 addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i32 addrspace(1)* %idx.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %idx.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 %tid.ext
%idx = load volatile i32, i32 addrspace(1)* %gep
- %vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
+ %vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%elt = extractelement <2 x i16> %vec, i32 %idx
store i16 %elt, i16 addrspace(1)* %out.gep, align 2
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/fence-barrier.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fence-barrier.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fence-barrier.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fence-barrier.ll Tue Feb 13 10:00:25 2018
@@ -1,8 +1,8 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -enable-si-insert-waitcnts=1 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
; RUN: llvm-as -data-layout=A5 < %s | llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -enable-si-insert-waitcnts=1 -verify-machineinstrs | FileCheck --check-prefix=GCN %s
-declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
-declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
+declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
declare i32 @llvm.amdgcn.workitem.id.x()
declare i32 @llvm.amdgcn.workgroup.id.x()
declare void @llvm.amdgcn.s.barrier()
@@ -34,19 +34,19 @@ define amdgpu_kernel void @test_local(i3
fence syncscope("workgroup") acquire
%8 = load i32, i32 addrspace(3)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(3)* @test_local.temp, i64 0, i64 0), align 4
%9 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4
- %10 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
+ %10 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
%11 = call i32 @llvm.amdgcn.workitem.id.x()
%12 = call i32 @llvm.amdgcn.workgroup.id.x()
- %13 = getelementptr inbounds i8, i8 addrspace(2)* %10, i64 4
- %14 = bitcast i8 addrspace(2)* %13 to i16 addrspace(2)*
- %15 = load i16, i16 addrspace(2)* %14, align 4
+ %13 = getelementptr inbounds i8, i8 addrspace(4)* %10, i64 4
+ %14 = bitcast i8 addrspace(4)* %13 to i16 addrspace(4)*
+ %15 = load i16, i16 addrspace(4)* %14, align 4
%16 = zext i16 %15 to i32
%17 = mul i32 %12, %16
%18 = add i32 %17, %11
- %19 = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
+ %19 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%20 = zext i32 %18 to i64
- %21 = bitcast i8 addrspace(2)* %19 to i64 addrspace(2)*
- %22 = load i64, i64 addrspace(2)* %21, align 8
+ %21 = bitcast i8 addrspace(4)* %19 to i64 addrspace(4)*
+ %22 = load i64, i64 addrspace(4)* %21, align 8
%23 = add i64 %22, %20
%24 = getelementptr inbounds i32, i32 addrspace(1)* %9, i64 %23
store i32 %8, i32 addrspace(1)* %24, align 4
@@ -68,56 +68,56 @@ define amdgpu_kernel void @test_global(i
; <label>:4: ; preds = %58, %1
%5 = load i32, i32 addrspace(5)* %3, align 4
%6 = sext i32 %5 to i64
- %7 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
+ %7 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
%8 = call i32 @llvm.amdgcn.workitem.id.x()
%9 = call i32 @llvm.amdgcn.workgroup.id.x()
- %10 = getelementptr inbounds i8, i8 addrspace(2)* %7, i64 4
- %11 = bitcast i8 addrspace(2)* %10 to i16 addrspace(2)*
- %12 = load i16, i16 addrspace(2)* %11, align 4
+ %10 = getelementptr inbounds i8, i8 addrspace(4)* %7, i64 4
+ %11 = bitcast i8 addrspace(4)* %10 to i16 addrspace(4)*
+ %12 = load i16, i16 addrspace(4)* %11, align 4
%13 = zext i16 %12 to i32
%14 = mul i32 %9, %13
%15 = add i32 %14, %8
- %16 = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
+ %16 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%17 = zext i32 %15 to i64
- %18 = bitcast i8 addrspace(2)* %16 to i64 addrspace(2)*
- %19 = load i64, i64 addrspace(2)* %18, align 8
+ %18 = bitcast i8 addrspace(4)* %16 to i64 addrspace(4)*
+ %19 = load i64, i64 addrspace(4)* %18, align 8
%20 = add i64 %19, %17
%21 = icmp ult i64 %6, %20
br i1 %21, label %22, label %61
; <label>:22: ; preds = %4
- %23 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
+ %23 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
%24 = call i32 @llvm.amdgcn.workitem.id.x()
%25 = call i32 @llvm.amdgcn.workgroup.id.x()
- %26 = getelementptr inbounds i8, i8 addrspace(2)* %23, i64 4
- %27 = bitcast i8 addrspace(2)* %26 to i16 addrspace(2)*
- %28 = load i16, i16 addrspace(2)* %27, align 4
+ %26 = getelementptr inbounds i8, i8 addrspace(4)* %23, i64 4
+ %27 = bitcast i8 addrspace(4)* %26 to i16 addrspace(4)*
+ %28 = load i16, i16 addrspace(4)* %27, align 4
%29 = zext i16 %28 to i32
%30 = mul i32 %25, %29
%31 = add i32 %30, %24
- %32 = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
+ %32 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%33 = zext i32 %31 to i64
- %34 = bitcast i8 addrspace(2)* %32 to i64 addrspace(2)*
- %35 = load i64, i64 addrspace(2)* %34, align 8
+ %34 = bitcast i8 addrspace(4)* %32 to i64 addrspace(4)*
+ %35 = load i64, i64 addrspace(4)* %34, align 8
%36 = add i64 %35, %33
%37 = add i64 %36, 2184
%38 = trunc i64 %37 to i32
%39 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4
%40 = load i32, i32 addrspace(5)* %3, align 4
%41 = sext i32 %40 to i64
- %42 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
+ %42 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
%43 = call i32 @llvm.amdgcn.workitem.id.x()
%44 = call i32 @llvm.amdgcn.workgroup.id.x()
- %45 = getelementptr inbounds i8, i8 addrspace(2)* %42, i64 4
- %46 = bitcast i8 addrspace(2)* %45 to i16 addrspace(2)*
- %47 = load i16, i16 addrspace(2)* %46, align 4
+ %45 = getelementptr inbounds i8, i8 addrspace(4)* %42, i64 4
+ %46 = bitcast i8 addrspace(4)* %45 to i16 addrspace(4)*
+ %47 = load i16, i16 addrspace(4)* %46, align 4
%48 = zext i16 %47 to i32
%49 = mul i32 %44, %48
%50 = add i32 %49, %43
- %51 = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
+ %51 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%52 = zext i32 %50 to i64
- %53 = bitcast i8 addrspace(2)* %51 to i64 addrspace(2)*
- %54 = load i64, i64 addrspace(2)* %53, align 8
+ %53 = bitcast i8 addrspace(4)* %51 to i64 addrspace(4)*
+ %54 = load i64, i64 addrspace(4)* %53, align 8
%55 = add i64 %54, %52
%56 = add i64 %41, %55
%57 = getelementptr inbounds i32, i32 addrspace(1)* %39, i64 %56
@@ -147,19 +147,19 @@ define amdgpu_kernel void @test_global_l
%2 = alloca i32 addrspace(1)*, align 4, addrspace(5)
store i32 addrspace(1)* %0, i32 addrspace(1)* addrspace(5)* %2, align 4
%3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4
- %4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
+ %4 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
%5 = call i32 @llvm.amdgcn.workitem.id.x()
%6 = call i32 @llvm.amdgcn.workgroup.id.x()
- %7 = getelementptr inbounds i8, i8 addrspace(2)* %4, i64 4
- %8 = bitcast i8 addrspace(2)* %7 to i16 addrspace(2)*
- %9 = load i16, i16 addrspace(2)* %8, align 4
+ %7 = getelementptr inbounds i8, i8 addrspace(4)* %4, i64 4
+ %8 = bitcast i8 addrspace(4)* %7 to i16 addrspace(4)*
+ %9 = load i16, i16 addrspace(4)* %8, align 4
%10 = zext i16 %9 to i32
%11 = mul i32 %6, %10
%12 = add i32 %11, %5
- %13 = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
+ %13 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%14 = zext i32 %12 to i64
- %15 = bitcast i8 addrspace(2)* %13 to i64 addrspace(2)*
- %16 = load i64, i64 addrspace(2)* %15, align 8
+ %15 = bitcast i8 addrspace(4)* %13 to i64 addrspace(4)*
+ %16 = load i64, i64 addrspace(4)* %15, align 8
%17 = add i64 %16, %14
%18 = getelementptr inbounds i32, i32 addrspace(1)* %3, i64 %17
store i32 1, i32 addrspace(1)* %18, align 4
@@ -178,19 +178,19 @@ define amdgpu_kernel void @test_global_l
fence syncscope("workgroup") acquire
%24 = load i32, i32 addrspace(3)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(3)* @test_global_local.temp, i64 0, i64 0), align 4
%25 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4
- %26 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
+ %26 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
%27 = call i32 @llvm.amdgcn.workitem.id.x()
%28 = call i32 @llvm.amdgcn.workgroup.id.x()
- %29 = getelementptr inbounds i8, i8 addrspace(2)* %26, i64 4
- %30 = bitcast i8 addrspace(2)* %29 to i16 addrspace(2)*
- %31 = load i16, i16 addrspace(2)* %30, align 4
+ %29 = getelementptr inbounds i8, i8 addrspace(4)* %26, i64 4
+ %30 = bitcast i8 addrspace(4)* %29 to i16 addrspace(4)*
+ %31 = load i16, i16 addrspace(4)* %30, align 4
%32 = zext i16 %31 to i32
%33 = mul i32 %28, %32
%34 = add i32 %33, %27
- %35 = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
+ %35 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%36 = zext i32 %34 to i64
- %37 = bitcast i8 addrspace(2)* %35 to i64 addrspace(2)*
- %38 = load i64, i64 addrspace(2)* %37, align 8
+ %37 = bitcast i8 addrspace(4)* %35 to i64 addrspace(4)*
+ %38 = load i64, i64 addrspace(4)* %37, align 8
%39 = add i64 %38, %36
%40 = getelementptr inbounds i32, i32 addrspace(1)* %25, i64 %39
store i32 %24, i32 addrspace(1)* %40, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/function-returns.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/function-returns.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/function-returns.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/function-returns.ll Tue Feb 13 10:00:25 2018
@@ -164,7 +164,7 @@ define <5 x i32> @v5i32_func_void() #0 {
; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define <8 x i32> @v8i32_func_void() #0 {
- %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(2)* undef
+ %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
%val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
ret <8 x i32> %val
}
@@ -177,7 +177,7 @@ define <8 x i32> @v8i32_func_void() #0 {
; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define <16 x i32> @v16i32_func_void() #0 {
- %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(2)* undef
+ %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
%val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
ret <16 x i32> %val
}
@@ -194,7 +194,7 @@ define <16 x i32> @v16i32_func_void() #0
; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define <32 x i32> @v32i32_func_void() #0 {
- %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(2)* undef
+ %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
%val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr
ret <32 x i32> %val
}
@@ -214,7 +214,7 @@ define <2 x i64> @v2i64_func_void() #0 {
; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define <3 x i64> @v3i64_func_void() #0 {
- %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(2)* undef
+ %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef
%val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr
ret <3 x i64> %val
}
@@ -225,7 +225,7 @@ define <3 x i64> @v3i64_func_void() #0 {
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define <4 x i64> @v4i64_func_void() #0 {
- %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(2)* undef
+ %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef
%val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr
ret <4 x i64> %val
}
@@ -237,7 +237,7 @@ define <4 x i64> @v4i64_func_void() #0 {
; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define <5 x i64> @v5i64_func_void() #0 {
- %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(2)* undef
+ %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef
%val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr
ret <5 x i64> %val
}
@@ -250,7 +250,7 @@ define <5 x i64> @v5i64_func_void() #0 {
; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define <8 x i64> @v8i64_func_void() #0 {
- %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(2)* undef
+ %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef
%val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr
ret <8 x i64> %val
}
@@ -267,7 +267,7 @@ define <8 x i64> @v8i64_func_void() #0 {
; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define <16 x i64> @v16i64_func_void() #0 {
- %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(2)* undef
+ %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef
%val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr
ret <16 x i64> %val
}
@@ -309,7 +309,7 @@ define <4 x i16> @v4i16_func_void() #0 {
; GFX9: v_lshrrev_b32_e32 v1, 16, v0
; GCN: s_setpc_b64
define <5 x i16> @v5i16_func_void() #0 {
- %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(2)* undef
+ %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef
%val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr
ret <5 x i16> %val
}
@@ -319,7 +319,7 @@ define <5 x i16> @v5i16_func_void() #0 {
; GFX9: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64
define <8 x i16> @v8i16_func_void() #0 {
- %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(2)* undef
+ %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef
%val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
ret <8 x i16> %val
}
@@ -330,7 +330,7 @@ define <8 x i16> @v8i16_func_void() #0 {
; GFX9: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64
define <16 x i16> @v16i16_func_void() #0 {
- %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(2)* undef
+ %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef
%val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr
ret <16 x i16> %val
}
@@ -342,7 +342,7 @@ define <16 x i16> @v16i16_func_void() #0
; GCN-DAG: v14
; GCN-DAG: v15
define <16 x i8> @v16i8_func_void() #0 {
- %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(2)* undef
+ %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
%val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
ret <16 x i8> %val
}
@@ -356,7 +356,7 @@ define <16 x i8> @v16i8_func_void() #0 {
; GFX89-DAG: v_lshrrev_b16_e32 v1, 8, v0
; GCN: s_setpc_b64
define <4 x i8> @v4i8_func_void() #0 {
- %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(2)* undef
+ %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef
%val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
ret <4 x i8> %val
}
@@ -427,7 +427,7 @@ define void @void_func_sret_struct_i8_i3
; GFX9: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64
define <33 x i32> @v33i32_func_void() #0 {
- %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(2)* undef
+ %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef
%val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr
ret <33 x i32> %val
}
@@ -469,7 +469,7 @@ define <33 x i32> @v33i32_func_void() #0
; GFX9: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64
define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
- %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(2)* undef
+ %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef
%val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr
ret { <32 x i32>, i32 }%val
}
@@ -511,7 +511,7 @@ define { <32 x i32>, i32 } @struct_v32i3
; GFX9: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64
define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
- %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(2)* undef
+ %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef
%val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr
ret { i32, <32 x i32> }%val
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/global-constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/global-constant.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/global-constant.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/global-constant.ll Tue Feb 13 10:00:25 2018
@@ -1,9 +1,9 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NOHSA %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA %s
- at private1 = private unnamed_addr addrspace(2) constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0]
- at private2 = private unnamed_addr addrspace(2) constant [4 x float] [float 4.0, float 5.0, float 6.0, float 7.0]
- at available_externally = available_externally addrspace(2) global [256 x i32] zeroinitializer
+ at private1 = private unnamed_addr addrspace(4) constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0]
+ at private2 = private unnamed_addr addrspace(4) constant [4 x float] [float 4.0, float 5.0, float 6.0, float 7.0]
+ at available_externally = available_externally addrspace(4) global [256 x i32] zeroinitializer
; GCN-LABEL: {{^}}private_test:
; GCN: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
@@ -27,11 +27,11 @@
; HSA: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], private2 at rel32@hi+4
define amdgpu_kernel void @private_test(i32 %index, float addrspace(1)* %out) {
- %ptr = getelementptr [4 x float], [4 x float] addrspace(2) * @private1, i32 0, i32 %index
- %val = load float, float addrspace(2)* %ptr
+ %ptr = getelementptr [4 x float], [4 x float] addrspace(4) * @private1, i32 0, i32 %index
+ %val = load float, float addrspace(4)* %ptr
store volatile float %val, float addrspace(1)* %out
- %ptr2 = getelementptr [4 x float], [4 x float] addrspace(2) * @private2, i32 0, i32 %index
- %val2 = load float, float addrspace(2)* %ptr2
+ %ptr2 = getelementptr [4 x float], [4 x float] addrspace(4) * @private2, i32 0, i32 %index
+ %val2 = load float, float addrspace(4)* %ptr2
store volatile float %val2, float addrspace(1)* %out
ret void
}
@@ -41,8 +41,8 @@ define amdgpu_kernel void @private_test(
; HSA: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], available_externally at gotpcrel32@lo+4
; HSA: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], available_externally at gotpcrel32@hi+4
define amdgpu_kernel void @available_externally_test(i32 addrspace(1)* %out) {
- %ptr = getelementptr [256 x i32], [256 x i32] addrspace(2)* @available_externally, i32 0, i32 1
- %val = load i32, i32 addrspace(2)* %ptr
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(4)* @available_externally, i32 0, i32 1
+ %val = load i32, i32 addrspace(4)* %ptr
store i32 %val, i32 addrspace(1)* %out
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/gv-const-addrspace.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/gv-const-addrspace.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/gv-const-addrspace.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/gv-const-addrspace.ll Tue Feb 13 10:00:25 2018
@@ -4,9 +4,9 @@
; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
- at b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2
+ at b = internal addrspace(4) constant [1 x i16] [ i16 7 ], align 2
- at float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4
+ at float_gv = internal unnamed_addr addrspace(4) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4
; FUNC-LABEL: {{^}}float:
; GCN: s_load_dword
@@ -17,13 +17,13 @@
; EG-NOT: MOV
define amdgpu_kernel void @float(float addrspace(1)* %out, i32 %index) {
entry:
- %0 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
- %1 = load float, float addrspace(2)* %0
+ %0 = getelementptr inbounds [5 x float], [5 x float] addrspace(4)* @float_gv, i32 0, i32 %index
+ %1 = load float, float addrspace(4)* %0
store float %1, float addrspace(1)* %out
ret void
}
- at i32_gv = internal unnamed_addr addrspace(2) constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 4
+ at i32_gv = internal unnamed_addr addrspace(4) constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 4
; FUNC-LABEL: {{^}}i32:
@@ -35,8 +35,8 @@ entry:
; EG-NOT: MOV
define amdgpu_kernel void @i32(i32 addrspace(1)* %out, i32 %index) {
entry:
- %0 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(2)* @i32_gv, i32 0, i32 %index
- %1 = load i32, i32 addrspace(2)* %0
+ %0 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(4)* @i32_gv, i32 0, i32 %index
+ %1 = load i32, i32 addrspace(4)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -44,7 +44,7 @@ entry:
%struct.foo = type { float, [5 x i32] }
- at struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
+ at struct_foo_gv = internal unnamed_addr addrspace(4) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
; FUNC-LABEL: {{^}}struct_foo_gv_load:
; GCN: s_load_dword
@@ -54,13 +54,13 @@ entry:
; EG-NOT: MOVA_INT
; EG-NOT: MOV
define amdgpu_kernel void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
- %gep = getelementptr inbounds [1 x %struct.foo], [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
- %load = load i32, i32 addrspace(2)* %gep, align 4
+ %gep = getelementptr inbounds [1 x %struct.foo], [1 x %struct.foo] addrspace(4)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
+ %load = load i32, i32 addrspace(4)* %gep, align 4
store i32 %load, i32 addrspace(1)* %out, align 4
ret void
}
- at array_v1_gv = internal addrspace(2) constant [4 x <1 x i32>] [ <1 x i32> <i32 1>,
+ at array_v1_gv = internal addrspace(4) constant [4 x <1 x i32>] [ <1 x i32> <i32 1>,
<1 x i32> <i32 2>,
<1 x i32> <i32 3>,
<1 x i32> <i32 4> ]
@@ -73,8 +73,8 @@ define amdgpu_kernel void @struct_foo_gv
; EG-NOT: MOVA_INT
; EG-NOT: MOV
define amdgpu_kernel void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
- %gep = getelementptr inbounds [4 x <1 x i32>], [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
- %load = load <1 x i32>, <1 x i32> addrspace(2)* %gep, align 4
+ %gep = getelementptr inbounds [4 x <1 x i32>], [4 x <1 x i32>] addrspace(4)* @array_v1_gv, i32 0, i32 %index
+ %load = load <1 x i32>, <1 x i32> addrspace(4)* %gep, align 4
store <1 x i32> %load, <1 x i32> addrspace(1)* %out, align 4
ret void
}
@@ -90,8 +90,8 @@ entry:
br i1 %0, label %if, label %else
if:
- %1 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
- %2 = load float, float addrspace(2)* %1
+ %1 = getelementptr inbounds [5 x float], [5 x float] addrspace(4)* @float_gv, i32 0, i32 %index
+ %2 = load float, float addrspace(4)* %1
store float %2, float addrspace(1)* %out
br label %endif
Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa-func-align.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-func-align.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-func-align.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-func-align.ll Tue Feb 13 10:00:25 2018
@@ -10,9 +10,9 @@
; HSA: .globl simple_align16
; HSA: .p2align 5
-define void @simple_align16(i32 addrspace(1)* addrspace(2)* %ptr.out) align 32 {
+define void @simple_align16(i32 addrspace(1)* addrspace(4)* %ptr.out) align 32 {
entry:
- %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out
+ %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %ptr.out
store i32 0, i32 addrspace(1)* %out
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa-func.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-func.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-func.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-func.ll Tue Feb 13 10:00:25 2018
@@ -51,9 +51,9 @@
; HSA: .size simple, .Lfunc_end0-simple
; HSA: ; Function info:
; HSA-NOT: COMPUTE_PGM_RSRC2
-define void @simple(i32 addrspace(1)* addrspace(2)* %ptr.out) {
+define void @simple(i32 addrspace(1)* addrspace(4)* %ptr.out) {
entry:
- %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out
+ %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %ptr.out
store i32 0, i32 addrspace(1)* %out
ret void
}
@@ -61,9 +61,9 @@ entry:
; Ignore explicit alignment that is too low.
; HSA: .globl simple_align2
; HSA: .p2align 2
-define void @simple_align2(i32 addrspace(1)* addrspace(2)* %ptr.out) align 2 {
+define void @simple_align2(i32 addrspace(1)* addrspace(4)* %ptr.out) align 2 {
entry:
- %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out
+ %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %ptr.out
store i32 0, i32 addrspace(1)* %out
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll Tue Feb 13 10:00:25 2018
@@ -581,7 +581,7 @@ define amdgpu_kernel void @test_multi_ar
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g,
- i32 addrspace(2)* %c,
+ i32 addrspace(4)* %c,
i32 addrspace(3)* %l)
!kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51
!kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
Modified: llvm/trunk/test/CodeGen/AMDGPU/image-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/image-schedule.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/image-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/image-schedule.ll Tue Feb 13 10:00:25 2018
@@ -20,21 +20,21 @@ define dllexport amdgpu_cs void @_amdgpu
%.0.vec.insert = insertelement <2 x i32> undef, i32 %arg2, i32 0
%.4.vec.insert = shufflevector <2 x i32> %.0.vec.insert, <2 x i32> %tmp6, <2 x i32> <i32 0, i32 3>
%tmp7 = bitcast <2 x i32> %.4.vec.insert to i64
- %tmp8 = inttoptr i64 %tmp7 to [4294967295 x i8] addrspace(2)*
+ %tmp8 = inttoptr i64 %tmp7 to [4294967295 x i8] addrspace(4)*
%tmp9 = add <3 x i32> %arg3, %arg5
- %tmp10 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(2)* %tmp8, i64 0, i64 32
- %tmp11 = bitcast i8 addrspace(2)* %tmp10 to <8 x i32> addrspace(2)*, !amdgpu.uniform !0
- %tmp12 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp11, align 16
+ %tmp10 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %tmp8, i64 0, i64 32
+ %tmp11 = bitcast i8 addrspace(4)* %tmp10 to <8 x i32> addrspace(4)*, !amdgpu.uniform !0
+ %tmp12 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp11, align 16
%tmp13 = shufflevector <3 x i32> %tmp9, <3 x i32> undef, <2 x i32> <i32 0, i32 1>
%tmp14 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp13, <8 x i32> %tmp12, i32 15, i1 false, i1 false, i1 false, i1 false) #0
- %tmp15 = inttoptr i64 %tmp7 to <8 x i32> addrspace(2)*
- %tmp16 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp15, align 16
+ %tmp15 = inttoptr i64 %tmp7 to <8 x i32> addrspace(4)*
+ %tmp16 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp15, align 16
call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %tmp14, <2 x i32> %tmp13, <8 x i32> %tmp16, i32 15, i1 false, i1 false, i1 false, i1 false) #0
- %tmp17 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp15, align 16
+ %tmp17 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp15, align 16
%tmp18 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp13, <8 x i32> %tmp17, i32 15, i1 false, i1 false, i1 false, i1 false) #0
- %tmp19 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(2)* %tmp8, i64 0, i64 64
- %tmp20 = bitcast i8 addrspace(2)* %tmp19 to <8 x i32> addrspace(2)*, !amdgpu.uniform !0
- %tmp21 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp20, align 16
+ %tmp19 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %tmp8, i64 0, i64 64
+ %tmp20 = bitcast i8 addrspace(4)* %tmp19 to <8 x i32> addrspace(4)*, !amdgpu.uniform !0
+ %tmp21 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp20, align 16
call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %tmp18, <2 x i32> %tmp13, <8 x i32> %tmp21, i32 15, i1 false, i1 false, i1 false, i1 false) #0
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll Tue Feb 13 10:00:25 2018
@@ -10,8 +10,8 @@
; GFX9-NOT: lshr
; GFX9: s_pack_lh_b32_b16 s{{[0-9]+}}, 0x3e7, [[VEC]]
-define amdgpu_kernel void @s_insertelement_v2i16_0(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr) #0 {
- %vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
+define amdgpu_kernel void @s_insertelement_v2i16_0(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr) #0 {
+ %vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%vecins = insertelement <2 x i16> %vec, i16 999, i32 0
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out
ret void
@@ -28,8 +28,8 @@ define amdgpu_kernel void @s_inserteleme
; GFX9-NOT: [[ELT0]]
; GFX9-NOT: [[VEC]]
; GFX9: s_pack_lh_b32_b16 s{{[0-9]+}}, [[ELT0]], [[VEC]]
-define amdgpu_kernel void @s_insertelement_v2i16_0_reg(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i16 %elt) #0 {
- %vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
+define amdgpu_kernel void @s_insertelement_v2i16_0_reg(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i16 %elt) #0 {
+ %vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%vecins = insertelement <2 x i16> %vec, i16 %elt, i32 0
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out
ret void
@@ -48,8 +48,8 @@ define amdgpu_kernel void @s_inserteleme
; GFX9: s_lshr_b32 [[ELT1:s[0-9]+]], [[VEC]], 16
; GFX9-DAG: s_pack_ll_b32_b16 s{{[0-9]+}}, [[ELT0]], [[ELT1]]
; GFX9-DAG: ; use [[ELT1]]
-define amdgpu_kernel void @s_insertelement_v2i16_0_multi_use_hi_reg(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i16 %elt) #0 {
- %vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
+define amdgpu_kernel void @s_insertelement_v2i16_0_multi_use_hi_reg(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i16 %elt) #0 {
+ %vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%elt1 = extractelement <2 x i16> %vec, i32 1
%vecins = insertelement <2 x i16> %vec, i16 %elt, i32 0
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out
@@ -68,8 +68,8 @@ define amdgpu_kernel void @s_inserteleme
; GFX9-NOT: [[ELT0]]
; GFX9-NOT: [[VEC]]
; GFX9: s_pack_hh_b32_b16 s{{[0-9]+}}, [[ELT_ARG]], [[VEC]]
-define amdgpu_kernel void @s_insertelement_v2i16_0_reghi(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i32 %elt.arg) #0 {
- %vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
+define amdgpu_kernel void @s_insertelement_v2i16_0_reghi(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i32 %elt.arg) #0 {
+ %vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%elt.hi = lshr i32 %elt.arg, 16
%elt = trunc i32 %elt.hi to i16
%vecins = insertelement <2 x i16> %vec, i16 %elt, i32 0
@@ -88,8 +88,8 @@ define amdgpu_kernel void @s_inserteleme
; GFX9: s_lshr_b32 [[ELT1:s[0-9]+]], [[ELT_ARG]], 16
; GFX9: s_pack_lh_b32_b16 s{{[0-9]+}}, [[ELT1]], [[VEC]]
; GFX9: ; use [[ELT1]]
-define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_multi_use_1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i32 %elt.arg) #0 {
- %vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
+define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_multi_use_1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i32 %elt.arg) #0 {
+ %vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%elt.hi = lshr i32 %elt.arg, 16
%elt = trunc i32 %elt.hi to i16
%vecins = insertelement <2 x i16> %vec, i16 %elt, i32 0
@@ -113,8 +113,8 @@ define amdgpu_kernel void @s_inserteleme
; GFX9: s_pack_ll_b32_b16 s{{[0-9]+}}, [[ELT_HI]], [[VEC_HI]]
; GFX9: ; use [[ELT_HI]]
; GFX9: ; use [[VEC_HI]]
-define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_both_multi_use_1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i32 %elt.arg) #0 {
- %vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
+define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_both_multi_use_1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i32 %elt.arg) #0 {
+ %vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%elt.hi = lshr i32 %elt.arg, 16
%elt = trunc i32 %elt.hi to i16
%vec.hi = extractelement <2 x i16> %vec, i32 1
@@ -137,8 +137,8 @@ define amdgpu_kernel void @s_inserteleme
; CIVI: s_or_b32 [[INS:s[0-9]+]], [[ELT0]], 0x3e70000
; GFX9: s_pack_ll_b32_b16 s{{[0-9]+}}, [[VEC]], 0x3e7
-define amdgpu_kernel void @s_insertelement_v2i16_1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr) #0 {
- %vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
+define amdgpu_kernel void @s_insertelement_v2i16_1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr) #0 {
+ %vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%vecins = insertelement <2 x i16> %vec, i16 999, i32 1
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out
ret void
@@ -153,8 +153,8 @@ define amdgpu_kernel void @s_inserteleme
; GCN-NOT: shlr
; GFX9: s_pack_ll_b32_b16 s{{[0-9]+}}, [[VEC]], [[ELT1]]
-define amdgpu_kernel void @s_insertelement_v2i16_1_reg(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i16 %elt) #0 {
- %vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
+define amdgpu_kernel void @s_insertelement_v2i16_1_reg(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i16 %elt) #0 {
+ %vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%vecins = insertelement <2 x i16> %vec, i16 %elt, i32 1
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out
ret void
@@ -167,8 +167,8 @@ define amdgpu_kernel void @s_inserteleme
; GFX9: s_lshr_b32 [[ELT1:s[0-9]+]], [[VEC]], 16
; GFX9: s_pack_ll_b32_b16 s{{[0-9]+}}, 0x4500, [[ELT1]]
-define amdgpu_kernel void @s_insertelement_v2f16_0(<2 x half> addrspace(1)* %out, <2 x half> addrspace(2)* %vec.ptr) #0 {
- %vec = load <2 x half>, <2 x half> addrspace(2)* %vec.ptr
+define amdgpu_kernel void @s_insertelement_v2f16_0(<2 x half> addrspace(1)* %out, <2 x half> addrspace(4)* %vec.ptr) #0 {
+ %vec = load <2 x half>, <2 x half> addrspace(4)* %vec.ptr
%vecins = insertelement <2 x half> %vec, half 5.000000e+00, i32 0
store <2 x half> %vecins, <2 x half> addrspace(1)* %out
ret void
@@ -182,8 +182,8 @@ define amdgpu_kernel void @s_inserteleme
; CIVI: s_or_b32 [[INS:s[0-9]+]], [[ELT0]], 0x45000000
; GFX9: s_pack_ll_b32_b16 s{{[0-9]+}}, [[VEC]], 0x4500
-define amdgpu_kernel void @s_insertelement_v2f16_1(<2 x half> addrspace(1)* %out, <2 x half> addrspace(2)* %vec.ptr) #0 {
- %vec = load <2 x half>, <2 x half> addrspace(2)* %vec.ptr
+define amdgpu_kernel void @s_insertelement_v2f16_1(<2 x half> addrspace(1)* %out, <2 x half> addrspace(4)* %vec.ptr) #0 {
+ %vec = load <2 x half>, <2 x half> addrspace(4)* %vec.ptr
%vecins = insertelement <2 x half> %vec, half 5.000000e+00, i32 1
store <2 x half> %vecins, <2 x half> addrspace(1)* %out
ret void
@@ -399,9 +399,9 @@ define amdgpu_kernel void @v_inserteleme
; GCN-DAG: s_lshl_b32 [[MASK:s[0-9]+]], 0xffff, [[SCALED_IDX]]
; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VVEC]]
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @s_insertelement_v2i16_dynamic(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i32 addrspace(2)* %idx.ptr) #0 {
- %idx = load volatile i32, i32 addrspace(2)* %idx.ptr
- %vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
+define amdgpu_kernel void @s_insertelement_v2i16_dynamic(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i32 addrspace(4)* %idx.ptr) #0 {
+ %idx = load volatile i32, i32 addrspace(4)* %idx.ptr
+ %vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%vecins = insertelement <2 x i16> %vec, i16 999, i32 %idx
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll Tue Feb 13 10:00:25 2018
@@ -22,8 +22,8 @@ define amdgpu_kernel void @test_merge_st
; GCN: s_load_dwordx2 s{{\[}}[[SPTR_LO:[0-9]+]]:[[SPTR_HI:[0-9]+]]{{\]}}
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x1c8007b
; GCN: buffer_store_dword [[K]], off, s{{\[}}[[SPTR_LO]]:
-define amdgpu_kernel void @test_merge_store_constant_i16_invariant_constant_pointer_load(i16 addrspace(1)* addrspace(2)* dereferenceable(4096) nonnull %in) #0 {
- %ptr = load i16 addrspace(1)*, i16 addrspace(1)* addrspace(2)* %in, !invariant.load !0
+define amdgpu_kernel void @test_merge_store_constant_i16_invariant_constant_pointer_load(i16 addrspace(1)* addrspace(4)* dereferenceable(4096) nonnull %in) #0 {
+ %ptr = load i16 addrspace(1)*, i16 addrspace(1)* addrspace(4)* %in, !invariant.load !0
%ptr.1 = getelementptr i16, i16 addrspace(1)* %ptr, i64 1
store i16 123, i16 addrspace(1)* %ptr, align 4
store i16 456, i16 addrspace(1)* %ptr.1
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll Tue Feb 13 10:00:25 2018
@@ -14,10 +14,10 @@
; CHECK: s_movk_i32 [[K:s[0-9]+]], 0x4d2 ; encoding
; CHECK: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, [[K]] idxen offen offset:65535 glc slc
-define amdgpu_vs void @main([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, [2 x <4 x i32>] addrspace(2)* byval %arg3, [17 x <4 x i32>] addrspace(2)* inreg %arg4, [17 x <4 x i32>] addrspace(2)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) {
+define amdgpu_vs void @main([17 x <4 x i32>] addrspace(4)* byval %arg, [32 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <32 x i8>] addrspace(4)* byval %arg2, [2 x <4 x i32>] addrspace(4)* byval %arg3, [17 x <4 x i32>] addrspace(4)* inreg %arg4, [17 x <4 x i32>] addrspace(4)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) {
main_body:
- %tmp = getelementptr [2 x <4 x i32>], [2 x <4 x i32>] addrspace(2)* %arg3, i64 0, i32 1
- %tmp10 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+ %tmp = getelementptr [2 x <4 x i32>], [2 x <4 x i32>] addrspace(4)* %arg3, i64 0, i32 1
+ %tmp10 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
%tmp11 = shl i32 %arg6, 2
%tmp12 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0)
%tmp13 = bitcast i32 %tmp12 to float
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll Tue Feb 13 10:00:25 2018
@@ -7,13 +7,13 @@
; GCN: enable_sgpr_dispatch_ptr = 1
; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
define amdgpu_kernel void @test(i32 addrspace(1)* %out) {
- %dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
- %header_ptr = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
- %value = load i32, i32 addrspace(2)* %header_ptr
+ %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
+ %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
+ %value = load i32, i32 addrspace(4)* %header_ptr
store i32 %value, i32 addrspace(1)* %out
ret void
}
-declare noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
+declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
attributes #0 = { readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.hsa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.hsa.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.hsa.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.hsa.ll Tue Feb 13 10:00:25 2018
@@ -2,23 +2,23 @@
; ERROR: in function test_kernel{{.*}}: non-hsa intrinsic with hsa target
define amdgpu_kernel void @test_kernel(i32 addrspace(1)* %out) #1 {
- %implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr()
- %header_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)*
- %value = load i32, i32 addrspace(2)* %header_ptr
+ %implicit_buffer_ptr = call i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr()
+ %header_ptr = bitcast i8 addrspace(4)* %implicit_buffer_ptr to i32 addrspace(4)*
+ %value = load i32, i32 addrspace(4)* %header_ptr
store i32 %value, i32 addrspace(1)* %out
ret void
}
; ERROR: in function test_func{{.*}}: non-hsa intrinsic with hsa target
define void @test_func(i32 addrspace(1)* %out) #1 {
- %implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr()
- %header_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)*
- %value = load i32, i32 addrspace(2)* %header_ptr
+ %implicit_buffer_ptr = call i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr()
+ %header_ptr = bitcast i8 addrspace(4)* %implicit_buffer_ptr to i32 addrspace(4)*
+ %value = load i32, i32 addrspace(4)* %header_ptr
store i32 %value, i32 addrspace(1)* %out
ret void
}
-declare i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr() #0
+declare i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr() #0
attributes #0 = { nounwind readnone speculatable }
attributes #1 = { nounwind }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll Tue Feb 13 10:00:25 2018
@@ -10,9 +10,9 @@
define amdgpu_ps i32 @test_ps() #1 {
%alloca = alloca i32, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
- %implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr()
- %buffer_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)*
- %value = load volatile i32, i32 addrspace(2)* %buffer_ptr
+ %implicit_buffer_ptr = call i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr()
+ %buffer_ptr = bitcast i8 addrspace(4)* %implicit_buffer_ptr to i32 addrspace(4)*
+ %value = load volatile i32, i32 addrspace(4)* %buffer_ptr
ret i32 %value
}
@@ -23,13 +23,13 @@ define amdgpu_ps i32 @test_ps() #1 {
define amdgpu_cs i32 @test_cs() #1 {
%alloca = alloca i32, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
- %implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr()
- %buffer_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)*
- %value = load volatile i32, i32 addrspace(2)* %buffer_ptr
+ %implicit_buffer_ptr = call i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr()
+ %buffer_ptr = bitcast i8 addrspace(4)* %implicit_buffer_ptr to i32 addrspace(4)*
+ %value = load volatile i32, i32 addrspace(4)* %buffer_ptr
ret i32 %value
}
-declare i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr() #0
+declare i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr() #0
attributes #0 = { nounwind readnone speculatable }
attributes #1 = { nounwind }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll Tue Feb 13 10:00:25 2018
@@ -11,9 +11,9 @@
; HSA: s_load_dword s0, s[4:5], 0x0
define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
- %implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
- %cast = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
- %load = load volatile i32, i32 addrspace(2)* %cast
+ %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
+ %load = load volatile i32, i32 addrspace(4)* %cast
ret void
}
@@ -26,9 +26,9 @@ define amdgpu_kernel void @kernel_implic
; HSA: s_load_dword s0, s[4:5], 0x1c
define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
- %implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
- %cast = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
- %load = load volatile i32, i32 addrspace(2)* %cast
+ %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
+ %load = load volatile i32, i32 addrspace(4)* %cast
ret void
}
@@ -38,9 +38,9 @@ define amdgpu_kernel void @kernel_implic
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
define void @func_implicitarg_ptr() #1 {
- %implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
- %cast = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
- %load = load volatile i32, i32 addrspace(2)* %cast
+ %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
+ %load = load volatile i32, i32 addrspace(4)* %cast
ret void
}
@@ -86,12 +86,12 @@ define void @func_call_implicitarg_ptr_f
; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0{{$}}
; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0{{$}}
define void @func_kernarg_implicitarg_ptr() #1 {
- %kernarg.segment.ptr = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
- %implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
- %cast.kernarg.segment.ptr = bitcast i8 addrspace(2)* %kernarg.segment.ptr to i32 addrspace(2)*
- %cast.implicitarg = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
- %load0 = load volatile i32, i32 addrspace(2)* %cast.kernarg.segment.ptr
- %load1 = load volatile i32, i32 addrspace(2)* %cast.implicitarg
+ %kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+ %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %cast.kernarg.segment.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
+ %cast.implicitarg = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
+ %load0 = load volatile i32, i32 addrspace(4)* %cast.kernarg.segment.ptr
+ %load1 = load volatile i32, i32 addrspace(4)* %cast.implicitarg
ret void
}
@@ -106,8 +106,8 @@ define amdgpu_kernel void @kernel_call_k
ret void
}
-declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #2
-declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #2
+declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #2
+declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #2
attributes #0 = { nounwind noinline }
attributes #1 = { nounwind noinline }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll Tue Feb 13 10:00:25 2018
@@ -11,10 +11,10 @@
; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0xa
define amdgpu_kernel void @test(i32 addrspace(1)* %out) #1 {
- %kernarg.segment.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
- %header.ptr = bitcast i8 addrspace(2)* %kernarg.segment.ptr to i32 addrspace(2)*
- %gep = getelementptr i32, i32 addrspace(2)* %header.ptr, i64 10
- %value = load i32, i32 addrspace(2)* %gep
+ %kernarg.segment.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+ %header.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
+ %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10
+ %value = load i32, i32 addrspace(4)* %gep
store i32 %value, i32 addrspace(1)* %out
ret void
}
@@ -23,10 +23,10 @@ define amdgpu_kernel void @test(i32 addr
; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15
; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0x15
define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 {
- %implicitarg.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
- %header.ptr = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
- %gep = getelementptr i32, i32 addrspace(2)* %header.ptr, i64 10
- %value = load i32, i32 addrspace(2)* %gep
+ %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %header.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
+ %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10
+ %value = load i32, i32 addrspace(4)* %gep
store i32 %value, i32 addrspace(1)* %out
ret void
}
@@ -42,9 +42,9 @@ define amdgpu_kernel void @test_implicit
; MESA: buffer_store_dword [[V_VAL]]
; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]
define amdgpu_kernel void @test_implicit_alignment(i32 addrspace(1)* %out, <2 x i8> %in) #1 {
- %implicitarg.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
- %arg.ptr = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
- %val = load i32, i32 addrspace(2)* %arg.ptr
+ %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
+ %val = load i32, i32 addrspace(4)* %arg.ptr
store i32 %val, i32 addrspace(1)* %out
ret void
}
@@ -53,16 +53,16 @@ define amdgpu_kernel void @test_implicit
; HSA: enable_sgpr_kernarg_segment_ptr = 1
; HSA: s_load_dword s{{[0-9]+}}, s[4:5]
define amdgpu_kernel void @test_no_kernargs() #1 {
- %kernarg.segment.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
- %header.ptr = bitcast i8 addrspace(2)* %kernarg.segment.ptr to i32 addrspace(2)*
- %gep = getelementptr i32, i32 addrspace(2)* %header.ptr, i64 10
- %value = load i32, i32 addrspace(2)* %gep
+ %kernarg.segment.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+ %header.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
+ %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10
+ %value = load i32, i32 addrspace(4)* %gep
store volatile i32 %value, i32 addrspace(1)* undef
ret void
}
-declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
-declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #0
+declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
+declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll Tue Feb 13 10:00:25 2018
@@ -7,13 +7,13 @@
; GCN: enable_sgpr_queue_ptr = 1
; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
define amdgpu_kernel void @test(i32 addrspace(1)* %out) {
- %queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
- %header_ptr = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)*
- %value = load i32, i32 addrspace(2)* %header_ptr
+ %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
+ %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
+ %value = load i32, i32 addrspace(4)* %header_ptr
store i32 %value, i32 addrspace(1)* %out
ret void
}
-declare noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
+declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
attributes #0 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.memcpy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.memcpy.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.memcpy.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.memcpy.ll Tue Feb 13 10:00:25 2018
@@ -3,7 +3,7 @@
declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i1) nounwind
declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind
-declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(2)* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(4)* nocapture, i64, i1) nounwind
; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
@@ -328,8 +328,8 @@ define amdgpu_kernel void @test_small_me
}
; Test shouldConvertConstantLoadToIntImm
- at hello.align4 = private unnamed_addr addrspace(2) constant [16 x i8] c"constant string\00", align 4
- at hello.align1 = private unnamed_addr addrspace(2) constant [16 x i8] c"constant string\00", align 1
+ at hello.align4 = private unnamed_addr addrspace(4) constant [16 x i8] c"constant string\00", align 4
+ at hello.align1 = private unnamed_addr addrspace(4) constant [16 x i8] c"constant string\00", align 1
; FUNC-LABEL: {{^}}test_memcpy_const_string_align4:
; SI: s_getpc_b64
@@ -341,8 +341,8 @@ define amdgpu_kernel void @test_small_me
; SI-DAG: buffer_store_dwordx4
; SI-DAG: buffer_store_dwordx4
define amdgpu_kernel void @test_memcpy_const_string_align4(i8 addrspace(1)* noalias %out) nounwind {
- %str = bitcast [16 x i8] addrspace(2)* @hello.align4 to i8 addrspace(2)*
- call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* align 4 %out, i8 addrspace(2)* align 4 %str, i64 32, i1 false)
+ %str = bitcast [16 x i8] addrspace(4)* @hello.align4 to i8 addrspace(4)*
+ call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* align 4 %out, i8 addrspace(4)* align 4 %str, i64 32, i1 false)
ret void
}
@@ -366,7 +366,7 @@ define amdgpu_kernel void @test_memcpy_c
; SI: buffer_store_byte
; SI: buffer_store_byte
define amdgpu_kernel void @test_memcpy_const_string_align1(i8 addrspace(1)* noalias %out) nounwind {
- %str = bitcast [16 x i8] addrspace(2)* @hello.align1 to i8 addrspace(2)*
- call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(2)* %str, i64 32, i1 false)
+ %str = bitcast [16 x i8] addrspace(4)* @hello.align1 to i8 addrspace(4)*
+ call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(4)* %str, i64 32, i1 false)
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-constant-f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-constant-f64.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-constant-f64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-constant-f64.ll Tue Feb 13 10:00:25 2018
@@ -6,8 +6,8 @@
; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}]
; GCN-NOHSA: buffer_store_dwordx2
; GCN-HSA: flat_store_dwordx2
-define amdgpu_kernel void @constant_load_f64(double addrspace(1)* %out, double addrspace(2)* %in) #0 {
- %ld = load double, double addrspace(2)* %in
+define amdgpu_kernel void @constant_load_f64(double addrspace(1)* %out, double addrspace(4)* %in) #0 {
+ %ld = load double, double addrspace(4)* %in
store double %ld, double addrspace(1)* %out
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-constant-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-constant-i1.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-constant-i1.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-constant-i1.ll Tue Feb 13 10:00:25 2018
@@ -9,57 +9,57 @@
; EG: VTX_READ_8
; EG: AND_INT
-define amdgpu_kernel void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
- %load = load i1, i1 addrspace(2)* %in
+define amdgpu_kernel void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(4)* nocapture %in) #0 {
+ %load = load i1, i1 addrspace(4)* %in
store i1 %load, i1 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_load_v2i1:
-define amdgpu_kernel void @constant_load_v2i1(<2 x i1> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <2 x i1>, <2 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_load_v2i1(<2 x i1> addrspace(1)* %out, <2 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <2 x i1>, <2 x i1> addrspace(4)* %in
store <2 x i1> %load, <2 x i1> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_load_v3i1:
-define amdgpu_kernel void @constant_load_v3i1(<3 x i1> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <3 x i1>, <3 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_load_v3i1(<3 x i1> addrspace(1)* %out, <3 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <3 x i1>, <3 x i1> addrspace(4)* %in
store <3 x i1> %load, <3 x i1> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_load_v4i1:
-define amdgpu_kernel void @constant_load_v4i1(<4 x i1> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <4 x i1>, <4 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_load_v4i1(<4 x i1> addrspace(1)* %out, <4 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <4 x i1>, <4 x i1> addrspace(4)* %in
store <4 x i1> %load, <4 x i1> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_load_v8i1:
-define amdgpu_kernel void @constant_load_v8i1(<8 x i1> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <8 x i1>, <8 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_load_v8i1(<8 x i1> addrspace(1)* %out, <8 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <8 x i1>, <8 x i1> addrspace(4)* %in
store <8 x i1> %load, <8 x i1> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_load_v16i1:
-define amdgpu_kernel void @constant_load_v16i1(<16 x i1> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <16 x i1>, <16 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_load_v16i1(<16 x i1> addrspace(1)* %out, <16 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <16 x i1>, <16 x i1> addrspace(4)* %in
store <16 x i1> %load, <16 x i1> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_load_v32i1:
-define amdgpu_kernel void @constant_load_v32i1(<32 x i1> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <32 x i1>, <32 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_load_v32i1(<32 x i1> addrspace(1)* %out, <32 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <32 x i1>, <32 x i1> addrspace(4)* %in
store <32 x i1> %load, <32 x i1> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_load_v64i1:
-define amdgpu_kernel void @constant_load_v64i1(<64 x i1> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <64 x i1>, <64 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_load_v64i1(<64 x i1> addrspace(1)* %out, <64 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <64 x i1>, <64 x i1> addrspace(4)* %in
store <64 x i1> %load, <64 x i1> addrspace(1)* %out
ret void
}
@@ -67,8 +67,8 @@ define amdgpu_kernel void @constant_load
; FUNC-LABEL: {{^}}constant_zextload_i1_to_i32:
; GCN: buffer_load_ubyte
; GCN: buffer_store_dword
-define amdgpu_kernel void @constant_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
- %a = load i1, i1 addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(4)* nocapture %in) #0 {
+ %a = load i1, i1 addrspace(4)* %in
%ext = zext i1 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@@ -81,136 +81,136 @@ define amdgpu_kernel void @constant_zext
; EG: VTX_READ_8
; EG: BFE_INT
-define amdgpu_kernel void @constant_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
- %a = load i1, i1 addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(4)* nocapture %in) #0 {
+ %a = load i1, i1 addrspace(4)* %in
%ext = sext i1 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v1i1_to_v1i32:
-define amdgpu_kernel void @constant_zextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <1 x i1>, <1 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <1 x i1>, <1 x i1> addrspace(4)* %in
%ext = zext <1 x i1> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v1i1_to_v1i32:
-define amdgpu_kernel void @constant_sextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <1 x i1>, <1 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <1 x i1>, <1 x i1> addrspace(4)* %in
%ext = sext <1 x i1> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v2i1_to_v2i32:
-define amdgpu_kernel void @constant_zextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <2 x i1>, <2 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <2 x i1>, <2 x i1> addrspace(4)* %in
%ext = zext <2 x i1> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v2i1_to_v2i32:
-define amdgpu_kernel void @constant_sextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <2 x i1>, <2 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <2 x i1>, <2 x i1> addrspace(4)* %in
%ext = sext <2 x i1> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v3i1_to_v3i32:
-define amdgpu_kernel void @constant_zextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <3 x i1>, <3 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <3 x i1>, <3 x i1> addrspace(4)* %in
%ext = zext <3 x i1> %load to <3 x i32>
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v3i1_to_v3i32:
-define amdgpu_kernel void @constant_sextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <3 x i1>, <3 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <3 x i1>, <3 x i1> addrspace(4)* %in
%ext = sext <3 x i1> %load to <3 x i32>
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v4i1_to_v4i32:
-define amdgpu_kernel void @constant_zextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <4 x i1>, <4 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <4 x i1>, <4 x i1> addrspace(4)* %in
%ext = zext <4 x i1> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v4i1_to_v4i32:
-define amdgpu_kernel void @constant_sextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <4 x i1>, <4 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <4 x i1>, <4 x i1> addrspace(4)* %in
%ext = sext <4 x i1> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v8i1_to_v8i32:
-define amdgpu_kernel void @constant_zextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <8 x i1>, <8 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <8 x i1>, <8 x i1> addrspace(4)* %in
%ext = zext <8 x i1> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v8i1_to_v8i32:
-define amdgpu_kernel void @constant_sextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <8 x i1>, <8 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <8 x i1>, <8 x i1> addrspace(4)* %in
%ext = sext <8 x i1> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v16i1_to_v16i32:
-define amdgpu_kernel void @constant_zextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <16 x i1>, <16 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <16 x i1>, <16 x i1> addrspace(4)* %in
%ext = zext <16 x i1> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v16i1_to_v16i32:
-define amdgpu_kernel void @constant_sextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <16 x i1>, <16 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <16 x i1>, <16 x i1> addrspace(4)* %in
%ext = sext <16 x i1> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v32i1_to_v32i32:
-define amdgpu_kernel void @constant_zextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <32 x i1>, <32 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <32 x i1>, <32 x i1> addrspace(4)* %in
%ext = zext <32 x i1> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v32i1_to_v32i32:
-define amdgpu_kernel void @constant_sextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <32 x i1>, <32 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <32 x i1>, <32 x i1> addrspace(4)* %in
%ext = sext <32 x i1> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v64i1_to_v64i32:
-define amdgpu_kernel void @constant_zextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <64 x i1>, <64 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <64 x i1>, <64 x i1> addrspace(4)* %in
%ext = zext <64 x i1> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v64i1_to_v64i32:
-define amdgpu_kernel void @constant_sextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <64 x i1>, <64 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <64 x i1>, <64 x i1> addrspace(4)* %in
%ext = sext <64 x i1> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
ret void
@@ -221,8 +221,8 @@ define amdgpu_kernel void @constant_sext
; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
; GCN-DAG: v_and_b32_e32 {{v[0-9]+}}, 1, [[LOAD]]
; GCN: buffer_store_dwordx2
-define amdgpu_kernel void @constant_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
- %a = load i1, i1 addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(4)* nocapture %in) #0 {
+ %a = load i1, i1 addrspace(4)* %in
%ext = zext i1 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -233,136 +233,136 @@ define amdgpu_kernel void @constant_zext
; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
; GCN: buffer_store_dwordx2
-define amdgpu_kernel void @constant_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
- %a = load i1, i1 addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(4)* nocapture %in) #0 {
+ %a = load i1, i1 addrspace(4)* %in
%ext = sext i1 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v1i1_to_v1i64:
-define amdgpu_kernel void @constant_zextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <1 x i1>, <1 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <1 x i1>, <1 x i1> addrspace(4)* %in
%ext = zext <1 x i1> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v1i1_to_v1i64:
-define amdgpu_kernel void @constant_sextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <1 x i1>, <1 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <1 x i1>, <1 x i1> addrspace(4)* %in
%ext = sext <1 x i1> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v2i1_to_v2i64:
-define amdgpu_kernel void @constant_zextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <2 x i1>, <2 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <2 x i1>, <2 x i1> addrspace(4)* %in
%ext = zext <2 x i1> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v2i1_to_v2i64:
-define amdgpu_kernel void @constant_sextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <2 x i1>, <2 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <2 x i1>, <2 x i1> addrspace(4)* %in
%ext = sext <2 x i1> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v3i1_to_v3i64:
-define amdgpu_kernel void @constant_zextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <3 x i1>, <3 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <3 x i1>, <3 x i1> addrspace(4)* %in
%ext = zext <3 x i1> %load to <3 x i64>
store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v3i1_to_v3i64:
-define amdgpu_kernel void @constant_sextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <3 x i1>, <3 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <3 x i1>, <3 x i1> addrspace(4)* %in
%ext = sext <3 x i1> %load to <3 x i64>
store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v4i1_to_v4i64:
-define amdgpu_kernel void @constant_zextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <4 x i1>, <4 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <4 x i1>, <4 x i1> addrspace(4)* %in
%ext = zext <4 x i1> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v4i1_to_v4i64:
-define amdgpu_kernel void @constant_sextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <4 x i1>, <4 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <4 x i1>, <4 x i1> addrspace(4)* %in
%ext = sext <4 x i1> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v8i1_to_v8i64:
-define amdgpu_kernel void @constant_zextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <8 x i1>, <8 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <8 x i1>, <8 x i1> addrspace(4)* %in
%ext = zext <8 x i1> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v8i1_to_v8i64:
-define amdgpu_kernel void @constant_sextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <8 x i1>, <8 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <8 x i1>, <8 x i1> addrspace(4)* %in
%ext = sext <8 x i1> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v16i1_to_v16i64:
-define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <16 x i1>, <16 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <16 x i1>, <16 x i1> addrspace(4)* %in
%ext = zext <16 x i1> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v16i1_to_v16i64:
-define amdgpu_kernel void @constant_sextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <16 x i1>, <16 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <16 x i1>, <16 x i1> addrspace(4)* %in
%ext = sext <16 x i1> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v32i1_to_v32i64:
-define amdgpu_kernel void @constant_zextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <32 x i1>, <32 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <32 x i1>, <32 x i1> addrspace(4)* %in
%ext = zext <32 x i1> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v32i1_to_v32i64:
-define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <32 x i1>, <32 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <32 x i1>, <32 x i1> addrspace(4)* %in
%ext = sext <32 x i1> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v64i1_to_v64i64:
-define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <64 x i1>, <64 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <64 x i1>, <64 x i1> addrspace(4)* %in
%ext = zext <64 x i1> %load to <64 x i64>
store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v64i1_to_v64i64:
-define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
- %load = load <64 x i1>, <64 x i1> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(4)* nocapture %in) #0 {
+ %load = load <64 x i1>, <64 x i1> addrspace(4)* %in
%ext = sext <64 x i1> %load to <64 x i64>
store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-constant-i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-constant-i16.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-constant-i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-constant-i16.ll Tue Feb 13 10:00:25 2018
@@ -8,9 +8,9 @@
; GCN-HSA: flat_load_ushort
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_load_i16(i16 addrspace(1)* %out, i16 addrspace(2)* %in) {
+define amdgpu_kernel void @constant_load_i16(i16 addrspace(1)* %out, i16 addrspace(4)* %in) {
entry:
- %ld = load i16, i16 addrspace(2)* %in
+ %ld = load i16, i16 addrspace(4)* %in
store i16 %ld, i16 addrspace(1)* %out
ret void
}
@@ -19,9 +19,9 @@ entry:
; GCN: s_load_dword s
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) {
+define amdgpu_kernel void @constant_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) {
entry:
- %ld = load <2 x i16>, <2 x i16> addrspace(2)* %in
+ %ld = load <2 x i16>, <2 x i16> addrspace(4)* %in
store <2 x i16> %ld, <2 x i16> addrspace(1)* %out
ret void
}
@@ -31,9 +31,9 @@ entry:
; EG-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4, #1
-define amdgpu_kernel void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
+define amdgpu_kernel void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
entry:
- %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
+ %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
ret void
}
@@ -42,9 +42,9 @@ entry:
; GCN: s_load_dwordx2
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) {
+define amdgpu_kernel void @constant_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) {
entry:
- %ld = load <4 x i16>, <4 x i16> addrspace(2)* %in
+ %ld = load <4 x i16>, <4 x i16> addrspace(4)* %in
store <4 x i16> %ld, <4 x i16> addrspace(1)* %out
ret void
}
@@ -53,9 +53,9 @@ entry:
; GCN: s_load_dwordx4
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) {
+define amdgpu_kernel void @constant_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) {
entry:
- %ld = load <8 x i16>, <8 x i16> addrspace(2)* %in
+ %ld = load <8 x i16>, <8 x i16> addrspace(4)* %in
store <8 x i16> %ld, <8 x i16> addrspace(1)* %out
ret void
}
@@ -65,9 +65,9 @@ entry:
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
-define amdgpu_kernel void @constant_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) {
+define amdgpu_kernel void @constant_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) {
entry:
- %ld = load <16 x i16>, <16 x i16> addrspace(2)* %in
+ %ld = load <16 x i16>, <16 x i16> addrspace(4)* %in
store <16 x i16> %ld, <16 x i16> addrspace(1)* %out
ret void
}
@@ -80,8 +80,8 @@ entry:
; GCN-HSA: flat_store_dword
; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}, 0, #1
-define amdgpu_kernel void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
- %a = load i16, i16 addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
+ %a = load i16, i16 addrspace(4)* %in
%ext = zext i16 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@@ -97,8 +97,8 @@ define amdgpu_kernel void @constant_zext
; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
; EG: 16
-define amdgpu_kernel void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
- %a = load i16, i16 addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
+ %a = load i16, i16 addrspace(4)* %in
%ext = sext i16 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@@ -109,8 +109,8 @@ define amdgpu_kernel void @constant_sext
; GCN-HSA: flat_load_ushort
; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}, 0, #1
-define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
- %load = load <1 x i16>, <1 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
+ %load = load <1 x i16>, <1 x i16> addrspace(4)* %in
%ext = zext <1 x i16> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
@@ -123,8 +123,8 @@ define amdgpu_kernel void @constant_zext
; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
; EG: 16
-define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
- %load = load <1 x i16>, <1 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
+ %load = load <1 x i16>, <1 x i16> addrspace(4)* %in
%ext = sext <1 x i16> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
@@ -140,8 +140,8 @@ define amdgpu_kernel void @constant_sext
; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], literal
; EG: 16
; EG: 16
-define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
- %load = load <2 x i16>, <2 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
+ %load = load <2 x i16>, <2 x i16> addrspace(4)* %in
%ext = zext <2 x i16> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
@@ -160,8 +160,8 @@ define amdgpu_kernel void @constant_zext
; EG-DAG: BFE_INT {{[* ]*}}[[ST]].Y, {{PV\.[XYZW]}}, 0.0, literal
; EG-DAG: 16
; EG-DAG: 16
-define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
- %load = load <2 x i16>, <2 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
+ %load = load <2 x i16>, <2 x i16> addrspace(4)* %in
%ext = sext <2 x i16> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
@@ -183,9 +183,9 @@ define amdgpu_kernel void @constant_sext
; EG-DAG: AND_INT {{[* ]*}}[[ST_HI]].X, {{T[0-9]\.[XYZW]}}, literal
; EG-DAG: 65535
; EG-DAG: 65535
-define amdgpu_kernel void @constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
+define amdgpu_kernel void @constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
entry:
- %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
+ %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
%ext = zext <3 x i16> %ld to <3 x i32>
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
ret void
@@ -204,9 +204,9 @@ entry:
; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].X, {{T[0-9]\.[XYZW]}}, 0.0, literal
; EG-DAG: 16
; EG-DAG: 16
-define amdgpu_kernel void @constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
+define amdgpu_kernel void @constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
entry:
- %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
+ %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
%ext = sext <3 x i16> %ld to <3 x i32>
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
ret void
@@ -229,8 +229,8 @@ entry:
; EG-DAG: AND_INT {{[* ]*}}[[ST]].Z, {{T[0-9]\.[XYZW]}}, literal
; EG-DAG: 65535
; EG-DAG: 65535
-define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
- %load = load <4 x i16>, <4 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
+ %load = load <4 x i16>, <4 x i16> addrspace(4)* %in
%ext = zext <4 x i16> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
@@ -254,8 +254,8 @@ define amdgpu_kernel void @constant_zext
; EG-DAG: 16
; EG-DAG: 16
; EG-DAG: 16
-define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
- %load = load <4 x i16>, <4 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
+ %load = load <4 x i16>, <4 x i16> addrspace(4)* %in
%ext = sext <4 x i16> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
@@ -288,8 +288,8 @@ define amdgpu_kernel void @constant_sext
; EG-DAG: 65535
; EG-DAG: 65535
; EG-DAG: 65535
-define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
- %load = load <8 x i16>, <8 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
+ %load = load <8 x i16>, <8 x i16> addrspace(4)* %in
%ext = zext <8 x i16> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
@@ -322,8 +322,8 @@ define amdgpu_kernel void @constant_zext
; EG-DAG: 16
; EG-DAG: 16
; EG-DAG: 16
-define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
- %load = load <8 x i16>, <8 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
+ %load = load <8 x i16>, <8 x i16> addrspace(4)* %in
%ext = sext <8 x i16> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
@@ -337,8 +337,8 @@ define amdgpu_kernel void @constant_sext
; v16i16 is naturally 32 byte aligned
; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], {{T[0-9]+.[XYZW]}}, 0, #1
; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], {{T[0-9]+.[XYZW]}}, 16, #1
-define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
- %load = load <16 x i16>, <16 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
+ %load = load <16 x i16>, <16 x i16> addrspace(4)* %in
%ext = zext <16 x i16> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
@@ -352,8 +352,8 @@ define amdgpu_kernel void @constant_zext
; v16i16 is naturally 32 byte aligned
; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], {{T[0-9]+\.[XYZW]}}, 0, #1
; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], {{T[0-9]+\.[XYZW]}}, 16, #1
-define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
- %load = load <16 x i16>, <16 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
+ %load = load <16 x i16>, <16 x i16> addrspace(4)* %in
%ext = sext <16 x i16> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
@@ -369,8 +369,8 @@ define amdgpu_kernel void @constant_sext
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 16, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 32, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 48, #1
-define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
- %load = load <32 x i16>, <32 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
+ %load = load <32 x i16>, <32 x i16> addrspace(4)* %in
%ext = zext <32 x i16> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
ret void
@@ -385,8 +385,8 @@ define amdgpu_kernel void @constant_zext
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 16, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 32, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 48, #1
-define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
- %load = load <32 x i16>, <32 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
+ %load = load <32 x i16>, <32 x i16> addrspace(4)* %in
%ext = sext <32 x i16> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
ret void
@@ -404,8 +404,8 @@ define amdgpu_kernel void @constant_sext
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 80, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 96, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 112, #1
-define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
- %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
+ %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
%ext = zext <64 x i16> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
ret void
@@ -421,8 +421,8 @@ define amdgpu_kernel void @constant_zext
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 80, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 96, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 112, #1
-define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
- %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
+ %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
%ext = sext <64 x i16> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
ret void
@@ -438,8 +438,8 @@ define amdgpu_kernel void @constant_sext
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG: MOV {{.*}}, 0.0
-define amdgpu_kernel void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
- %a = load i16, i16 addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
+ %a = load i16, i16 addrspace(4)* %in
%ext = zext i16 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -464,8 +464,8 @@ define amdgpu_kernel void @constant_zext
; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
; TODO: These could be expanded earlier using ASHR 15
; EG: 31
-define amdgpu_kernel void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
- %a = load i16, i16 addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
+ %a = load i16, i16 addrspace(4)* %in
%ext = sext i16 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -475,8 +475,8 @@ define amdgpu_kernel void @constant_sext
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG: MOV {{.*}}, 0.0
-define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
- %load = load <1 x i16>, <1 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
+ %load = load <1 x i16>, <1 x i16> addrspace(4)* %in
%ext = zext <1 x i16> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -488,8 +488,8 @@ define amdgpu_kernel void @constant_zext
; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
; TODO: These could be expanded earlier using ASHR 15
; EG: 31
-define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
- %load = load <1 x i16>, <1 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
+ %load = load <1 x i16>, <1 x i16> addrspace(4)* %in
%ext = sext <1 x i16> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -498,8 +498,8 @@ define amdgpu_kernel void @constant_sext
; FUNC-LABEL: {{^}}constant_zextload_v2i16_to_v2i64:
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
- %load = load <2 x i16>, <2 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
+ %load = load <2 x i16>, <2 x i16> addrspace(4)* %in
%ext = zext <2 x i16> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -508,8 +508,8 @@ define amdgpu_kernel void @constant_zext
; FUNC-LABEL: {{^}}constant_sextload_v2i16_to_v2i64:
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
- %load = load <2 x i16>, <2 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
+ %load = load <2 x i16>, <2 x i16> addrspace(4)* %in
%ext = sext <2 x i16> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -518,8 +518,8 @@ define amdgpu_kernel void @constant_sext
; FUNC-LABEL: {{^}}constant_zextload_v4i16_to_v4i64:
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
- %load = load <4 x i16>, <4 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
+ %load = load <4 x i16>, <4 x i16> addrspace(4)* %in
%ext = zext <4 x i16> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -528,8 +528,8 @@ define amdgpu_kernel void @constant_zext
; FUNC-LABEL: {{^}}constant_sextload_v4i16_to_v4i64:
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
- %load = load <4 x i16>, <4 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
+ %load = load <4 x i16>, <4 x i16> addrspace(4)* %in
%ext = sext <4 x i16> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -538,8 +538,8 @@ define amdgpu_kernel void @constant_sext
; FUNC-LABEL: {{^}}constant_zextload_v8i16_to_v8i64:
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
- %load = load <8 x i16>, <8 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
+ %load = load <8 x i16>, <8 x i16> addrspace(4)* %in
%ext = zext <8 x i16> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -548,8 +548,8 @@ define amdgpu_kernel void @constant_zext
; FUNC-LABEL: {{^}}constant_sextload_v8i16_to_v8i64:
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
- %load = load <8 x i16>, <8 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
+ %load = load <8 x i16>, <8 x i16> addrspace(4)* %in
%ext = sext <8 x i16> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -559,8 +559,8 @@ define amdgpu_kernel void @constant_sext
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
-define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
- %load = load <16 x i16>, <16 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
+ %load = load <16 x i16>, <16 x i16> addrspace(4)* %in
%ext = zext <16 x i16> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -570,8 +570,8 @@ define amdgpu_kernel void @constant_zext
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
-define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
- %load = load <16 x i16>, <16 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
+ %load = load <16 x i16>, <16 x i16> addrspace(4)* %in
%ext = sext <16 x i16> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -583,8 +583,8 @@ define amdgpu_kernel void @constant_sext
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1
-define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
- %load = load <32 x i16>, <32 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
+ %load = load <32 x i16>, <32 x i16> addrspace(4)* %in
%ext = zext <32 x i16> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
@@ -596,8 +596,8 @@ define amdgpu_kernel void @constant_zext
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1
-define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
- %load = load <32 x i16>, <32 x i16> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
+ %load = load <32 x i16>, <32 x i16> addrspace(4)* %in
%ext = sext <32 x i16> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
@@ -606,16 +606,16 @@ define amdgpu_kernel void @constant_sext
; These trigger undefined register machine verifier errors
; ; XFUNC-LABEL: {{^}}constant_zextload_v64i16_to_v64i64:
-; define amdgpu_kernel void @constant_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
-; %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
+; define amdgpu_kernel void @constant_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
+; %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
; %ext = zext <64 x i16> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
; ret void
; }
; ; XFUNC-LABEL: {{^}}constant_sextload_v64i16_to_v64i64:
-; define amdgpu_kernel void @constant_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
-; %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
+; define amdgpu_kernel void @constant_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
+; %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
; %ext = sext <64 x i16> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
; ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-constant-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-constant-i32.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-constant-i32.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-constant-i32.ll Tue Feb 13 10:00:25 2018
@@ -7,9 +7,9 @@
; GCN: s_load_dword s{{[0-9]+}}
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-define amdgpu_kernel void @constant_load_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) #0 {
entry:
- %ld = load i32, i32 addrspace(2)* %in
+ %ld = load i32, i32 addrspace(4)* %in
store i32 %ld, i32 addrspace(1)* %out
ret void
}
@@ -18,9 +18,9 @@ entry:
; GCN: s_load_dwordx2
; EG: VTX_READ_64
-define amdgpu_kernel void @constant_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(4)* %in) #0 {
entry:
- %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
+ %ld = load <2 x i32>, <2 x i32> addrspace(4)* %in
store <2 x i32> %ld, <2 x i32> addrspace(1)* %out
ret void
}
@@ -29,9 +29,9 @@ entry:
; GCN: s_load_dwordx4
; EG: VTX_READ_128
-define amdgpu_kernel void @constant_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(4)* %in) #0 {
entry:
- %ld = load <3 x i32>, <3 x i32> addrspace(2)* %in
+ %ld = load <3 x i32>, <3 x i32> addrspace(4)* %in
store <3 x i32> %ld, <3 x i32> addrspace(1)* %out
ret void
}
@@ -40,9 +40,9 @@ entry:
; GCN: s_load_dwordx4
; EG: VTX_READ_128
-define amdgpu_kernel void @constant_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(4)* %in) #0 {
entry:
- %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
+ %ld = load <4 x i32>, <4 x i32> addrspace(4)* %in
store <4 x i32> %ld, <4 x i32> addrspace(1)* %out
ret void
}
@@ -52,9 +52,9 @@ entry:
; EG: VTX_READ_128
; EG: VTX_READ_128
-define amdgpu_kernel void @constant_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(4)* %in) #0 {
entry:
- %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
+ %ld = load <8 x i32>, <8 x i32> addrspace(4)* %in
store <8 x i32> %ld, <8 x i32> addrspace(1)* %out
ret void
}
@@ -66,9 +66,9 @@ entry:
; EG: VTX_READ_128
; EG: VTX_READ_128
; EG: VTX_READ_128
-define amdgpu_kernel void @constant_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(4)* %in) #0 {
entry:
- %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
+ %ld = load <16 x i32>, <16 x i32> addrspace(4)* %in
store <16 x i32> %ld, <16 x i32> addrspace(1)* %out
ret void
}
@@ -81,8 +81,8 @@ entry:
; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
; EG: CF_END
; EG: VTX_READ_32
-define amdgpu_kernel void @constant_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
- %ld = load i32, i32 addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(4)* %in) #0 {
+ %ld = load i32, i32 addrspace(4)* %in
%ext = zext i32 %ld to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -98,8 +98,8 @@ define amdgpu_kernel void @constant_zext
; EG: VTX_READ_32
; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.
; EG: 31
-define amdgpu_kernel void @constant_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
- %ld = load i32, i32 addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(4)* %in) #0 {
+ %ld = load i32, i32 addrspace(4)* %in
%ext = sext i32 %ld to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -108,8 +108,8 @@ define amdgpu_kernel void @constant_sext
; FUNC-LABEL: {{^}}constant_zextload_v1i32_to_v1i64:
; GCN: s_load_dword
; GCN: store_dwordx2
-define amdgpu_kernel void @constant_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(2)* %in) #0 {
- %ld = load <1 x i32>, <1 x i32> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(4)* %in) #0 {
+ %ld = load <1 x i32>, <1 x i32> addrspace(4)* %in
%ext = zext <1 x i32> %ld to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -119,8 +119,8 @@ define amdgpu_kernel void @constant_zext
; GCN: s_load_dword s[[LO:[0-9]+]]
; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[LO]], 31
; GCN: store_dwordx2
-define amdgpu_kernel void @constant_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(2)* %in) #0 {
- %ld = load <1 x i32>, <1 x i32> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(4)* %in) #0 {
+ %ld = load <1 x i32>, <1 x i32> addrspace(4)* %in
%ext = sext <1 x i32> %ld to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -129,8 +129,8 @@ define amdgpu_kernel void @constant_sext
; FUNC-LABEL: {{^}}constant_zextload_v2i32_to_v2i64:
; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
; GCN: store_dwordx4
-define amdgpu_kernel void @constant_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
- %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(4)* %in) #0 {
+ %ld = load <2 x i32>, <2 x i32> addrspace(4)* %in
%ext = zext <2 x i32> %ld to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -143,8 +143,8 @@ define amdgpu_kernel void @constant_zext
; GCN-DAG: s_ashr_i32
; GCN: store_dwordx4
-define amdgpu_kernel void @constant_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
- %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(4)* %in) #0 {
+ %ld = load <2 x i32>, <2 x i32> addrspace(4)* %in
%ext = sext <2 x i32> %ld to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -155,8 +155,8 @@ define amdgpu_kernel void @constant_sext
; GCN: store_dwordx4
; GCN: store_dwordx4
-define amdgpu_kernel void @constant_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
- %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(4)* %in) #0 {
+ %ld = load <4 x i32>, <4 x i32> addrspace(4)* %in
%ext = zext <4 x i32> %ld to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -172,8 +172,8 @@ define amdgpu_kernel void @constant_zext
; GCN: store_dwordx4
; GCN: store_dwordx4
-define amdgpu_kernel void @constant_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
- %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(4)* %in) #0 {
+ %ld = load <4 x i32>, <4 x i32> addrspace(4)* %in
%ext = sext <4 x i32> %ld to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -191,8 +191,8 @@ define amdgpu_kernel void @constant_sext
; GCN-HSA-DAG: flat_store_dwordx4
; GCN-SA-DAG: flat_store_dwordx4
; GCN-HSA-DAG: flat_store_dwordx4
-define amdgpu_kernel void @constant_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
- %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(4)* %in) #0 {
+ %ld = load <8 x i32>, <8 x i32> addrspace(4)* %in
%ext = zext <8 x i32> %ld to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -219,8 +219,8 @@ define amdgpu_kernel void @constant_zext
; GCN-HSA-DAG: flat_store_dwordx4
; GCN-HSA-DAG: flat_store_dwordx4
; GCN-HSA-DAG: flat_store_dwordx4
-define amdgpu_kernel void @constant_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
- %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(4)* %in) #0 {
+ %ld = load <8 x i32>, <8 x i32> addrspace(4)* %in
%ext = sext <8 x i32> %ld to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -240,8 +240,8 @@ define amdgpu_kernel void @constant_sext
; GCN: store_dwordx4
; GCN: store_dwordx4
; GCN: store_dwordx4
-define amdgpu_kernel void @constant_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
- %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(4)* %in) #0 {
+ %ld = load <16 x i32>, <16 x i32> addrspace(4)* %in
%ext = sext <16 x i32> %ld to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -267,8 +267,8 @@ define amdgpu_kernel void @constant_sext
; GCN-HSA: flat_store_dwordx4
; GCN-HSA: flat_store_dwordx4
; GCN-HSA: flat_store_dwordx4
-define amdgpu_kernel void @constant_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
- %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(4)* %in) #0 {
+ %ld = load <16 x i32>, <16 x i32> addrspace(4)* %in
%ext = zext <16 x i32> %ld to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -319,8 +319,8 @@ define amdgpu_kernel void @constant_zext
; GCN-HSA-DAG: flat_store_dwordx4
; GCN-HSA-DAG: flat_store_dwordx4
-define amdgpu_kernel void @constant_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 {
- %ld = load <32 x i32>, <32 x i32> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(4)* %in) #0 {
+ %ld = load <32 x i32>, <32 x i32> addrspace(4)* %in
%ext = sext <32 x i32> %ld to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
@@ -370,8 +370,8 @@ define amdgpu_kernel void @constant_sext
; GCN-HSA-DAG: flat_store_dwordx4
; GCN-HSA-DAG: flat_store_dwordx4
; GCN-HSA-DAG: flat_store_dwordx4
-define amdgpu_kernel void @constant_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 {
- %ld = load <32 x i32>, <32 x i32> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(4)* %in) #0 {
+ %ld = load <32 x i32>, <32 x i32> addrspace(4)* %in
%ext = zext <32 x i32> %ld to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-constant-i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-constant-i64.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-constant-i64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-constant-i64.ll Tue Feb 13 10:00:25 2018
@@ -7,8 +7,8 @@
; FUNC-LABEL: {{^}}constant_load_i64:
; GCN: s_load_dwordx2 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
; EG: VTX_READ_64
-define amdgpu_kernel void @constant_load_i64(i64 addrspace(1)* %out, i64 addrspace(2)* %in) #0 {
- %ld = load i64, i64 addrspace(2)* %in
+define amdgpu_kernel void @constant_load_i64(i64 addrspace(1)* %out, i64 addrspace(4)* %in) #0 {
+ %ld = load i64, i64 addrspace(4)* %in
store i64 %ld, i64 addrspace(1)* %out
ret void
}
@@ -17,9 +17,9 @@ define amdgpu_kernel void @constant_load
; GCN: s_load_dwordx4
; EG: VTX_READ_128
-define amdgpu_kernel void @constant_load_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(4)* %in) #0 {
entry:
- %ld = load <2 x i64>, <2 x i64> addrspace(2)* %in
+ %ld = load <2 x i64>, <2 x i64> addrspace(4)* %in
store <2 x i64> %ld, <2 x i64> addrspace(1)* %out
ret void
}
@@ -29,9 +29,9 @@ entry:
; EG-DAG: VTX_READ_128
; EG-DAG: VTX_READ_128
-define amdgpu_kernel void @constant_load_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(4)* %in) #0 {
entry:
- %ld = load <3 x i64>, <3 x i64> addrspace(2)* %in
+ %ld = load <3 x i64>, <3 x i64> addrspace(4)* %in
store <3 x i64> %ld, <3 x i64> addrspace(1)* %out
ret void
}
@@ -41,9 +41,9 @@ entry:
; EG: VTX_READ_128
; EG: VTX_READ_128
-define amdgpu_kernel void @constant_load_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(4)* %in) #0 {
entry:
- %ld = load <4 x i64>, <4 x i64> addrspace(2)* %in
+ %ld = load <4 x i64>, <4 x i64> addrspace(4)* %in
store <4 x i64> %ld, <4 x i64> addrspace(1)* %out
ret void
}
@@ -55,9 +55,9 @@ entry:
; EG: VTX_READ_128
; EG: VTX_READ_128
; EG: VTX_READ_128
-define amdgpu_kernel void @constant_load_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(4)* %in) #0 {
entry:
- %ld = load <8 x i64>, <8 x i64> addrspace(2)* %in
+ %ld = load <8 x i64>, <8 x i64> addrspace(4)* %in
store <8 x i64> %ld, <8 x i64> addrspace(1)* %out
ret void
}
@@ -74,9 +74,9 @@ entry:
; EG: VTX_READ_128
; EG: VTX_READ_128
; EG: VTX_READ_128
-define amdgpu_kernel void @constant_load_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(4)* %in) #0 {
entry:
- %ld = load <16 x i64>, <16 x i64> addrspace(2)* %in
+ %ld = load <16 x i64>, <16 x i64> addrspace(4)* %in
store <16 x i64> %ld, <16 x i64> addrspace(1)* %out
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-constant-i8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-constant-i8.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-constant-i8.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-constant-i8.ll Tue Feb 13 10:00:25 2018
@@ -10,9 +10,9 @@
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; TODO: NOT AND
-define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
entry:
- %ld = load i8, i8 addrspace(2)* %in
+ %ld = load i8, i8 addrspace(4)* %in
store i8 %ld, i8 addrspace(1)* %out
ret void
}
@@ -22,9 +22,9 @@ entry:
; GCN-HSA: flat_load_ushort v
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
entry:
- %ld = load <2 x i8>, <2 x i8> addrspace(2)* %in
+ %ld = load <2 x i8>, <2 x i8> addrspace(4)* %in
store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
ret void
}
@@ -33,9 +33,9 @@ entry:
; GCN: s_load_dword s
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
entry:
- %ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
+ %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
ret void
}
@@ -44,9 +44,9 @@ entry:
; GCN: s_load_dword s
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
entry:
- %ld = load <4 x i8>, <4 x i8> addrspace(2)* %in
+ %ld = load <4 x i8>, <4 x i8> addrspace(4)* %in
store <4 x i8> %ld, <4 x i8> addrspace(1)* %out
ret void
}
@@ -55,9 +55,9 @@ entry:
; GCN: s_load_dwordx2
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
entry:
- %ld = load <8 x i8>, <8 x i8> addrspace(2)* %in
+ %ld = load <8 x i8>, <8 x i8> addrspace(4)* %in
store <8 x i8> %ld, <8 x i8> addrspace(1)* %out
ret void
}
@@ -66,9 +66,9 @@ entry:
; GCN: s_load_dwordx4
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
entry:
- %ld = load <16 x i8>, <16 x i8> addrspace(2)* %in
+ %ld = load <16 x i8>, <16 x i8> addrspace(4)* %in
store <16 x i8> %ld, <16 x i8> addrspace(1)* %out
ret void
}
@@ -78,8 +78,8 @@ entry:
; GCN-HSA: flat_load_ubyte
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
- %a = load i8, i8 addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
+ %a = load i8, i8 addrspace(4)* %in
%ext = zext i8 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@@ -92,8 +92,8 @@ define amdgpu_kernel void @constant_zext
; EG: VTX_READ_8 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
; EG: 8
-define amdgpu_kernel void @constant_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
- %ld = load i8, i8 addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
+ %ld = load i8, i8 addrspace(4)* %in
%ext = sext i8 %ld to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@@ -102,8 +102,8 @@ define amdgpu_kernel void @constant_sext
; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i32:
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
- %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
%ext = zext <1 x i8> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
@@ -114,8 +114,8 @@ define amdgpu_kernel void @constant_zext
; EG: VTX_READ_8 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
; EG: 8
-define amdgpu_kernel void @constant_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
- %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
%ext = sext <1 x i8> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
@@ -129,8 +129,8 @@ define amdgpu_kernel void @constant_sext
; TODO: This should use DST, but for some there are redundant MOVs
; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
; EG: 8
-define amdgpu_kernel void @constant_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
- %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
%ext = zext <2 x i8> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
@@ -150,8 +150,8 @@ define amdgpu_kernel void @constant_zext
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: 8
; EG-DAG: 8
-define amdgpu_kernel void @constant_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
- %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
%ext = sext <2 x i8> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
@@ -170,9 +170,9 @@ define amdgpu_kernel void @constant_sext
; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
; EG-DAG: 8
; EG-DAG: 8
-define amdgpu_kernel void @constant_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
entry:
- %ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
+ %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
%ext = zext <3 x i8> %ld to <3 x i32>
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
ret void
@@ -193,9 +193,9 @@ entry:
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define amdgpu_kernel void @constant_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
entry:
- %ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
+ %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
%ext = sext <3 x i8> %ld to <3 x i32>
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
ret void
@@ -214,8 +214,8 @@ entry:
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define amdgpu_kernel void @constant_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
- %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
%ext = zext <4 x i8> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
@@ -236,8 +236,8 @@ define amdgpu_kernel void @constant_zext
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define amdgpu_kernel void @constant_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
- %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
%ext = sext <4 x i8> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
@@ -264,8 +264,8 @@ define amdgpu_kernel void @constant_sext
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define amdgpu_kernel void @constant_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
- %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
%ext = zext <8 x i8> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
@@ -294,8 +294,8 @@ define amdgpu_kernel void @constant_zext
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define amdgpu_kernel void @constant_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
- %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
%ext = sext <8 x i8> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
@@ -335,8 +335,8 @@ define amdgpu_kernel void @constant_sext
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define amdgpu_kernel void @constant_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
- %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
%ext = zext <16 x i8> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
@@ -378,8 +378,8 @@ define amdgpu_kernel void @constant_zext
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define amdgpu_kernel void @constant_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
- %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
%ext = sext <16 x i8> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
@@ -450,8 +450,8 @@ define amdgpu_kernel void @constant_sext
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define amdgpu_kernel void @constant_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
- %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
%ext = zext <32 x i8> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
ret void
@@ -526,8 +526,8 @@ define amdgpu_kernel void @constant_zext
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define amdgpu_kernel void @constant_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
- %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
%ext = sext <32 x i8> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
ret void
@@ -539,8 +539,8 @@ define amdgpu_kernel void @constant_sext
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
-define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
- %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
+ %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
%ext = zext <64 x i8> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
ret void
@@ -552,8 +552,8 @@ define amdgpu_kernel void @constant_zext
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
-define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
- %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
+ %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
%ext = sext <64 x i8> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
ret void
@@ -570,8 +570,8 @@ define amdgpu_kernel void @constant_sext
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG: MOV {{.*}}, 0.0
-define amdgpu_kernel void @constant_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
- %a = load i8, i8 addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
+ %a = load i8, i8 addrspace(4)* %in
%ext = zext i8 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -589,8 +589,8 @@ define amdgpu_kernel void @constant_zext
; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
; TODO: Why not 7 ?
; EG: 31
-define amdgpu_kernel void @constant_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
- %a = load i8, i8 addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
+ %a = load i8, i8 addrspace(4)* %in
%ext = sext i8 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -600,8 +600,8 @@ define amdgpu_kernel void @constant_sext
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG: MOV {{.*}}, 0.0
-define amdgpu_kernel void @constant_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
- %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
%ext = zext <1 x i8> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -613,8 +613,8 @@ define amdgpu_kernel void @constant_zext
; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
; TODO: Why not 7 ?
; EG: 31
-define amdgpu_kernel void @constant_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
- %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
%ext = sext <1 x i8> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -623,8 +623,8 @@ define amdgpu_kernel void @constant_sext
; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i64:
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
- %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
%ext = zext <2 x i8> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -633,8 +633,8 @@ define amdgpu_kernel void @constant_zext
; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i64:
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
- %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
%ext = sext <2 x i8> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -643,8 +643,8 @@ define amdgpu_kernel void @constant_sext
; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i64:
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
- %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
%ext = zext <4 x i8> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -653,8 +653,8 @@ define amdgpu_kernel void @constant_zext
; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i64:
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
- %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
%ext = sext <4 x i8> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -663,8 +663,8 @@ define amdgpu_kernel void @constant_sext
; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i64:
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
- %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
%ext = zext <8 x i8> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -673,8 +673,8 @@ define amdgpu_kernel void @constant_zext
; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i64:
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
- %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
%ext = sext <8 x i8> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -683,8 +683,8 @@ define amdgpu_kernel void @constant_sext
; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i64:
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
- %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
%ext = zext <16 x i8> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -693,8 +693,8 @@ define amdgpu_kernel void @constant_zext
; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i64:
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
- %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
%ext = sext <16 x i8> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -704,8 +704,8 @@ define amdgpu_kernel void @constant_sext
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
-define amdgpu_kernel void @constant_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
- %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
%ext = zext <32 x i8> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
@@ -715,24 +715,24 @@ define amdgpu_kernel void @constant_zext
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
-define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
- %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
%ext = sext <32 x i8> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
}
; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i64:
-; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
-; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
+; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
+; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
; %ext = zext <64 x i8> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
; ret void
; }
; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i64:
-; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
-; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
+; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
+; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
; %ext = sext <64 x i8> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
; ret void
@@ -744,8 +744,8 @@ define amdgpu_kernel void @constant_sext
; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]],
; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
-define amdgpu_kernel void @constant_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
- %a = load i8, i8 addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
+ %a = load i8, i8 addrspace(4)* %in
%ext = zext i8 %a to i16
store i16 %ext, i16 addrspace(1)* %out
ret void
@@ -759,16 +759,16 @@ define amdgpu_kernel void @constant_zext
; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
- %a = load i8, i8 addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
+ %a = load i8, i8 addrspace(4)* %in
%ext = sext i8 %a to i16
store i16 %ext, i16 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i16:
-define amdgpu_kernel void @constant_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
- %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
%ext = zext <1 x i8> %load to <1 x i16>
store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
ret void
@@ -778,8 +778,8 @@ define amdgpu_kernel void @constant_zext
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define amdgpu_kernel void @constant_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
- %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
%ext = sext <1 x i8> %load to <1 x i16>
store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
ret void
@@ -788,8 +788,8 @@ define amdgpu_kernel void @constant_sext
; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i16:
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
- %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
%ext = zext <2 x i8> %load to <2 x i16>
store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
ret void
@@ -800,8 +800,8 @@ define amdgpu_kernel void @constant_zext
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define amdgpu_kernel void @constant_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
- %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
%ext = sext <2 x i8> %load to <2 x i16>
store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
ret void
@@ -810,8 +810,8 @@ define amdgpu_kernel void @constant_sext
; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i16:
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
- %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
%ext = zext <4 x i8> %load to <4 x i16>
store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
ret void
@@ -824,8 +824,8 @@ define amdgpu_kernel void @constant_zext
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
- %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
%ext = sext <4 x i8> %load to <4 x i16>
store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
ret void
@@ -834,8 +834,8 @@ define amdgpu_kernel void @constant_sext
; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i16:
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
- %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
%ext = zext <8 x i8> %load to <8 x i16>
store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
ret void
@@ -853,8 +853,8 @@ define amdgpu_kernel void @constant_zext
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
- %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
%ext = sext <8 x i8> %load to <8 x i16>
store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
ret void
@@ -863,8 +863,8 @@ define amdgpu_kernel void @constant_sext
; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i16:
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
- %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
%ext = zext <16 x i8> %load to <16 x i16>
store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
ret void
@@ -889,8 +889,8 @@ define amdgpu_kernel void @constant_zext
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
- %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
%ext = sext <16 x i8> %load to <16 x i16>
store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
ret void
@@ -900,8 +900,8 @@ define amdgpu_kernel void @constant_sext
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
-define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
- %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
%ext = zext <32 x i8> %load to <32 x i16>
store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
ret void
@@ -943,24 +943,24 @@ define amdgpu_kernel void @constant_zext
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
- %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
%ext = sext <32 x i8> %load to <32 x i16>
store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
ret void
}
; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i16:
-; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
-; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
+; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
+; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
; %ext = zext <64 x i8> %load to <64 x i16>
; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
; ret void
; }
; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i16:
-; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
-; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
+; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
+; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
; %ext = sext <64 x i8> %load to <64 x i16>
; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
; ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-hi16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-hi16.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-hi16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-hi16.ll Tue Feb 13 10:00:25 2018
@@ -473,10 +473,10 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: flat_load_ushort
-define void @load_constant_hi_v2i16_reglo_vreg(i16 addrspace(2)* %in, i16 %reg) #0 {
+define void @load_constant_hi_v2i16_reglo_vreg(i16 addrspace(4)* %in, i16 %reg) #0 {
entry:
- %gep = getelementptr inbounds i16, i16 addrspace(2)* %in, i64 -2047
- %load = load i16, i16 addrspace(2)* %gep
+ %gep = getelementptr inbounds i16, i16 addrspace(4)* %in, i64 -2047
+ %load = load i16, i16 addrspace(4)* %gep
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@@ -492,10 +492,10 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: flat_load_ushort
-define void @load_constant_hi_v2f16_reglo_vreg(half addrspace(2)* %in, half %reg) #0 {
+define void @load_constant_hi_v2f16_reglo_vreg(half addrspace(4)* %in, half %reg) #0 {
entry:
- %gep = getelementptr inbounds half, half addrspace(2)* %in, i64 -2047
- %load = load half, half addrspace(2)* %gep
+ %gep = getelementptr inbounds half, half addrspace(4)* %in, i64 -2047
+ %load = load half, half addrspace(4)* %gep
%build0 = insertelement <2 x half> undef, half %reg, i32 0
%build1 = insertelement <2 x half> %build0, half %load, i32 1
store <2 x half> %build1, <2 x half> addrspace(1)* undef
@@ -625,11 +625,11 @@ entry:
; GFX9-NEXT: s_waitcnt
; GFX9-NEXT: v_mov_b32_e32 v0, v2
; GFX9-NEXT: s_setpc_b64
-define <2 x i16> @load_constant_v2i16_split(i16 addrspace(2)* %in) #0 {
+define <2 x i16> @load_constant_v2i16_split(i16 addrspace(4)* %in) #0 {
entry:
- %gep = getelementptr inbounds i16, i16 addrspace(2)* %in, i64 1
- %load0 = load volatile i16, i16 addrspace(2)* %in
- %load1 = load volatile i16, i16 addrspace(2)* %gep
+ %gep = getelementptr inbounds i16, i16 addrspace(4)* %in, i64 1
+ %load0 = load volatile i16, i16 addrspace(4)* %in
+ %load1 = load volatile i16, i16 addrspace(4)* %gep
%build0 = insertelement <2 x i16> undef, i16 %load0, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %load1, i32 1
ret <2 x i16> %build1
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-lo16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-lo16.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-lo16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-lo16.ll Tue Feb 13 10:00:25 2018
@@ -559,11 +559,11 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: flat_load_ushort
-define void @load_constant_lo_v2i16_reglo_vreg(i16 addrspace(2)* %in, i32 %reg) #0 {
+define void @load_constant_lo_v2i16_reglo_vreg(i16 addrspace(4)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
- %gep = getelementptr inbounds i16, i16 addrspace(2)* %in, i64 -2047
- %load = load i16, i16 addrspace(2)* %gep
+ %gep = getelementptr inbounds i16, i16 addrspace(4)* %in, i64 -2047
+ %load = load i16, i16 addrspace(4)* %gep
%build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
ret void
@@ -578,11 +578,11 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: flat_load_ushort
-define void @load_constant_lo_v2f16_reglo_vreg(half addrspace(2)* %in, i32 %reg) #0 {
+define void @load_constant_lo_v2f16_reglo_vreg(half addrspace(4)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x half>
- %gep = getelementptr inbounds half, half addrspace(2)* %in, i64 -2047
- %load = load half, half addrspace(2)* %gep
+ %gep = getelementptr inbounds half, half addrspace(4)* %in, i64 -2047
+ %load = load half, half addrspace(4)* %gep
%build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
store <2 x half> %build1, <2 x half> addrspace(1)* undef
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/mad24-get-global-id.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mad24-get-global-id.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mad24-get-global-id.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mad24-get-global-id.ll Tue Feb 13 10:00:25 2018
@@ -5,17 +5,17 @@
declare i32 @llvm.amdgcn.workgroup.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
+declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
; GCN-LABEL: {{^}}get_global_id_0:
; GCN: s_and_b32 [[WGSIZEX:s[0-9]+]], {{s[0-9]+}}, 0xffff
; GCN: v_mov_b32_e32 [[VWGSIZEX:v[0-9]+]], [[WGSIZEX]]
; GCN: v_mad_u32_u24 v{{[0-9]+}}, s8, [[VWGSIZEX]], v0
define amdgpu_kernel void @get_global_id_0(i32 addrspace(1)* %out) #1 {
- %dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
- %cast.dispatch.ptr = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
- %gep = getelementptr inbounds i32, i32 addrspace(2)* %cast.dispatch.ptr, i64 1
- %workgroup.size.xy = load i32, i32 addrspace(2)* %gep, align 4, !invariant.load !0
+ %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+ %cast.dispatch.ptr = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
+ %gep = getelementptr inbounds i32, i32 addrspace(4)* %cast.dispatch.ptr, i64 1
+ %workgroup.size.xy = load i32, i32 addrspace(4)* %gep, align 4, !invariant.load !0
%workgroup.size.x = and i32 %workgroup.size.xy, 65535
%workitem.id.x = call i32 @llvm.amdgcn.workitem.id.x(), !range !1
Modified: llvm/trunk/test/CodeGen/AMDGPU/missing-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/missing-store.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/missing-store.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/missing-store.ll Tue Feb 13 10:00:25 2018
@@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
- at ptr_load = addrspace(3) global i32 addrspace(2)* undef, align 8
+ at ptr_load = addrspace(3) global i32 addrspace(4)* undef, align 8
; Make sure when the load from %ptr2 is folded the chain isn't lost,
; resulting in losing the store to gptr
@@ -16,11 +16,11 @@
; SI: buffer_store_dword
; SI: s_endpgm
define amdgpu_kernel void @missing_store_reduced(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
- %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @ptr_load, align 8
- %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
+ %ptr0 = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(3)* @ptr_load, align 8
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 2
store i32 99, i32 addrspace(1)* %gptr, align 4
- %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(4)* %ptr2, align 4
store i32 %tmp2, i32 addrspace(1)* %out, align 4
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll Tue Feb 13 10:00:25 2018
@@ -5,40 +5,40 @@
; CHECK-LABEL: {{^}}test_none:
; CHECK: buffer_load_format_x v0, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
-define amdgpu_vs float @test_none(<4 x i32> addrspace(2)* inreg %base, i32 %i) {
+define amdgpu_vs float @test_none(<4 x i32> addrspace(4)* inreg %base, i32 %i) {
main_body:
- %ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i
- %tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32
+ %ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i
+ %tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 0, i32 0, i1 0, i1 0)
ret float %tmp7
}
; CHECK-LABEL: {{^}}test_idxen:
; CHECK: buffer_load_format_x v0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen{{$}}
-define amdgpu_vs float @test_idxen(<4 x i32> addrspace(2)* inreg %base, i32 %i) {
+define amdgpu_vs float @test_idxen(<4 x i32> addrspace(4)* inreg %base, i32 %i) {
main_body:
- %ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i
- %tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32
+ %ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i
+ %tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 0, i1 0, i1 0)
ret float %tmp7
}
; CHECK-LABEL: {{^}}test_offen:
; CHECK: buffer_load_format_x v0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
-define amdgpu_vs float @test_offen(<4 x i32> addrspace(2)* inreg %base, i32 %i) {
+define amdgpu_vs float @test_offen(<4 x i32> addrspace(4)* inreg %base, i32 %i) {
main_body:
- %ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i
- %tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32
+ %ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i
+ %tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 0, i32 undef, i1 0, i1 0)
ret float %tmp7
}
; CHECK-LABEL: {{^}}test_both:
; CHECK: buffer_load_format_x v0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen offen{{$}}
-define amdgpu_vs float @test_both(<4 x i32> addrspace(2)* inreg %base, i32 %i) {
+define amdgpu_vs float @test_both(<4 x i32> addrspace(4)* inreg %base, i32 %i) {
main_body:
- %ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i
- %tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32
+ %ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i
+ %tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 undef, i1 0, i1 0)
ret float %tmp7
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/mubuf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mubuf.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mubuf.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mubuf.ll Tue Feb 13 10:00:25 2018
@@ -55,10 +55,10 @@ entry:
; CHECK-LABEL: {{^}}soffset_max_imm:
; CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 64 offen glc
-define amdgpu_gs void @soffset_max_imm([6 x <4 x i32>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
+define amdgpu_gs void @soffset_max_imm([6 x <4 x i32>] addrspace(4)* byval, [17 x <4 x i32>] addrspace(4)* byval, [16 x <4 x i32>] addrspace(4)* byval, [32 x <8 x i32>] addrspace(4)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
main_body:
- %tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(2)* %0, i32 0, i32 0
- %tmp1 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp0
+ %tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(4)* %0, i32 0, i32 0
+ %tmp1 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp0
%tmp2 = shl i32 %6, 2
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
%tmp4 = add i32 %6, 16
@@ -74,10 +74,10 @@ main_body:
; CHECK-LABEL: {{^}}soffset_no_fold:
; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x41
; CHECK: buffer_load_dword v{{[0-9+]}}, v{{[0-9+]}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc
-define amdgpu_gs void @soffset_no_fold([6 x <4 x i32>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
+define amdgpu_gs void @soffset_no_fold([6 x <4 x i32>] addrspace(4)* byval, [17 x <4 x i32>] addrspace(4)* byval, [16 x <4 x i32>] addrspace(4)* byval, [32 x <8 x i32>] addrspace(4)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
main_body:
- %tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(2)* %0, i32 0, i32 0
- %tmp1 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp0
+ %tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(4)* %0, i32 0, i32 0
+ %tmp1 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp0
%tmp2 = shl i32 %6, 2
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
%tmp4 = add i32 %6, 16
Modified: llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll Tue Feb 13 10:00:25 2018
@@ -642,12 +642,12 @@ uniform.multi.exit.region:
br i1 %uniform.cond0, label %uniform.if, label %uniform.ret1
uniform.if:
- %sgpr0 = load volatile i32, i32 addrspace(2)* undef
+ %sgpr0 = load volatile i32, i32 addrspace(4)* undef
%uniform.cond1 = icmp slt i32 %sgpr0, 1
br i1 %uniform.cond1, label %uniform.then, label %uniform.endif
uniform.then:
- %sgpr1 = load volatile i32, i32 addrspace(2)* undef
+ %sgpr1 = load volatile i32, i32 addrspace(4)* undef
%uniform.cond2 = icmp sge i32 %sgpr1, 4
store volatile i32 33, i32 addrspace(1)* undef
br i1 %uniform.cond2, label %uniform.ret0, label %uniform.endif
Modified: llvm/trunk/test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll Tue Feb 13 10:00:25 2018
@@ -6,21 +6,21 @@
; EG: R_AMDGPU_ABS32 extern_const_addrspace
; CHECK-DAG: Name: extern_const_addrspace
- at extern_const_addrspace = external unnamed_addr addrspace(2) constant [5 x i32], align 4
+ at extern_const_addrspace = external unnamed_addr addrspace(4) constant [5 x i32], align 4
; CHECK-DAG: Name: load_extern_const_init
define amdgpu_kernel void @load_extern_const_init(i32 addrspace(1)* %out) nounwind {
- %val = load i32, i32 addrspace(2)* getelementptr ([5 x i32], [5 x i32] addrspace(2)* @extern_const_addrspace, i64 0, i64 3), align 4
+ %val = load i32, i32 addrspace(4)* getelementptr ([5 x i32], [5 x i32] addrspace(4)* @extern_const_addrspace, i64 0, i64 3), align 4
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
; CHECK-DAG: Name: undef_const_addrspace
- at undef_const_addrspace = unnamed_addr addrspace(2) constant [5 x i32] undef, align 4
+ at undef_const_addrspace = unnamed_addr addrspace(4) constant [5 x i32] undef, align 4
; CHECK-DAG: Name: undef_const_addrspace
define amdgpu_kernel void @load_undef_const_init(i32 addrspace(1)* %out) nounwind {
- %val = load i32, i32 addrspace(2)* getelementptr ([5 x i32], [5 x i32] addrspace(2)* @undef_const_addrspace, i64 0, i64 3), align 4
+ %val = load i32, i32 addrspace(4)* getelementptr ([5 x i32], [5 x i32] addrspace(4)* @undef_const_addrspace, i64 0, i64 3), align 4
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/no-shrink-extloads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/no-shrink-extloads.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/no-shrink-extloads.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/no-shrink-extloads.ll Tue Feb 13 10:00:25 2018
@@ -194,9 +194,9 @@ define amdgpu_kernel void @truncate_buff
; SI: s_load_dword [[LOAD:s[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0x0
; SI: s_waitcnt lgkmcnt(0)
; SI: s_and_b32 s{{[0-9]+}}, [[LOAD]], 0xffff
-define amdgpu_kernel void @smrd_mask_i32_to_i16(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
+define amdgpu_kernel void @smrd_mask_i32_to_i16(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
entry:
- %val = load i32, i32 addrspace(2)* %in
+ %val = load i32, i32 addrspace(4)* %in
%mask = and i32 %val, 65535
store i32 %mask, i32 addrspace(1)* %out
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/nullptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/nullptr.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/nullptr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/nullptr.ll Tue Feb 13 10:00:25 2018
@@ -1,7 +1,7 @@
;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs | FileCheck -check-prefixes=CHECK,GCN %s
;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -verify-machineinstrs | FileCheck -check-prefixes=CHECK,R600 %s
-%struct.S = type { i32 addrspace(5)*, i32 addrspace(1)*, i32 addrspace(2)*, i32 addrspace(3)*, i32*, i32 addrspace(4)*}
+%struct.S = type { i32 addrspace(5)*, i32 addrspace(1)*, i32 addrspace(4)*, i32 addrspace(3)*, i32*, i32 addrspace(2)*}
; CHECK-LABEL: nullptr_priv:
; CHECK-NEXT: .long 0
@@ -15,7 +15,7 @@
; CHECK-LABEL: nullptr_const:
; GCN-NEXT: .quad 0
; R600-NEXT: .long 0
- at nullptr_const = global i32 addrspace(2)* addrspacecast (i32* null to i32 addrspace(2)*)
+ at nullptr_const = global i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*)
; CHECK-LABEL: nullptr_local:
; CHECK-NEXT: .long -1
@@ -23,7 +23,7 @@
; CHECK-LABEL: nullptr_region:
; CHECK-NEXT: .long -1
- at nullptr_region = global i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*)
+ at nullptr_region = global i32 addrspace(2)* addrspacecast (i32* null to i32 addrspace(2)*)
; CHECK-LABEL: nullptr6:
; R600-NEXT: .long 0
@@ -113,7 +113,7 @@
@structWithPointers = addrspace(1) global %struct.S {
i32 addrspace(5)* addrspacecast (i32* null to i32 addrspace(5)*),
i32 addrspace(1)* addrspacecast (i32* null to i32 addrspace(1)*),
- i32 addrspace(2)* addrspacecast (i32* null to i32 addrspace(2)*),
+ i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*),
i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*),
i32* null,
- i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*)}, align 4
+ i32 addrspace(2)* addrspacecast (i32* null to i32 addrspace(2)*)}, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/pack.v2f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/pack.v2f16.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/pack.v2f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/pack.v2f16.ll Tue Feb 13 10:00:25 2018
@@ -8,9 +8,9 @@
; GFX9: s_load_dword [[VAL1:s[0-9]+]]
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], [[VAL0]], [[VAL1]]
; GFX9: ; use [[PACKED]]
-define amdgpu_kernel void @s_pack_v2f16(i32 addrspace(2)* %in0, i32 addrspace(2)* %in1) #0 {
- %val0 = load volatile i32, i32 addrspace(2)* %in0
- %val1 = load volatile i32, i32 addrspace(2)* %in1
+define amdgpu_kernel void @s_pack_v2f16(i32 addrspace(4)* %in0, i32 addrspace(4)* %in1) #0 {
+ %val0 = load volatile i32, i32 addrspace(4)* %in0
+ %val1 = load volatile i32, i32 addrspace(4)* %in1
%lo.i = trunc i32 %val0 to i16
%hi.i = trunc i32 %val1 to i16
%lo = bitcast i16 %lo.i to half
@@ -27,8 +27,8 @@ define amdgpu_kernel void @s_pack_v2f16(
; GFX9: s_load_dword [[VAL1:s[0-9]+]]
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], 0x1234, [[VAL1]]
; GFX9: ; use [[PACKED]]
-define amdgpu_kernel void @s_pack_v2f16_imm_lo(i32 addrspace(2)* %in1) #0 {
- %val1 = load i32, i32 addrspace(2)* %in1
+define amdgpu_kernel void @s_pack_v2f16_imm_lo(i32 addrspace(4)* %in1) #0 {
+ %val1 = load i32, i32 addrspace(4)* %in1
%hi.i = trunc i32 %val1 to i16
%hi = bitcast i16 %hi.i to half
%vec.0 = insertelement <2 x half> undef, half 0xH1234, i32 0
@@ -43,8 +43,8 @@ define amdgpu_kernel void @s_pack_v2f16_
; GFX9: s_load_dword [[VAL0:s[0-9]+]]
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], [[VAL0]], 0x1234
; GFX9: ; use [[PACKED]]
-define amdgpu_kernel void @s_pack_v2f16_imm_hi(i32 addrspace(2)* %in0) #0 {
- %val0 = load i32, i32 addrspace(2)* %in0
+define amdgpu_kernel void @s_pack_v2f16_imm_hi(i32 addrspace(4)* %in0) #0 {
+ %val0 = load i32, i32 addrspace(4)* %in0
%lo.i = trunc i32 %val0 to i16
%lo = bitcast i16 %lo.i to half
%vec.0 = insertelement <2 x half> undef, half %lo, i32 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/pack.v2i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/pack.v2i16.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/pack.v2i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/pack.v2i16.ll Tue Feb 13 10:00:25 2018
@@ -8,9 +8,9 @@
; GFX9: s_load_dword [[VAL1:s[0-9]+]]
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], [[VAL0]], [[VAL1]]
; GFX9: ; use [[PACKED]]
-define amdgpu_kernel void @s_pack_v2i16(i32 addrspace(2)* %in0, i32 addrspace(2)* %in1) #0 {
- %val0 = load volatile i32, i32 addrspace(2)* %in0
- %val1 = load volatile i32, i32 addrspace(2)* %in1
+define amdgpu_kernel void @s_pack_v2i16(i32 addrspace(4)* %in0, i32 addrspace(4)* %in1) #0 {
+ %val0 = load volatile i32, i32 addrspace(4)* %in0
+ %val1 = load volatile i32, i32 addrspace(4)* %in1
%lo = trunc i32 %val0 to i16
%hi = trunc i32 %val1 to i16
%vec.0 = insertelement <2 x i16> undef, i16 %lo, i32 0
@@ -25,8 +25,8 @@ define amdgpu_kernel void @s_pack_v2i16(
; GFX9: s_load_dword [[VAL1:s[0-9]+]]
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], 0x1c8, [[VAL1]]
; GFX9: ; use [[PACKED]]
-define amdgpu_kernel void @s_pack_v2i16_imm_lo(i32 addrspace(2)* %in1) #0 {
- %val1 = load i32, i32 addrspace(2)* %in1
+define amdgpu_kernel void @s_pack_v2i16_imm_lo(i32 addrspace(4)* %in1) #0 {
+ %val1 = load i32, i32 addrspace(4)* %in1
%hi = trunc i32 %val1 to i16
%vec.0 = insertelement <2 x i16> undef, i16 456, i32 0
%vec.1 = insertelement <2 x i16> %vec.0, i16 %hi, i32 1
@@ -40,8 +40,8 @@ define amdgpu_kernel void @s_pack_v2i16_
; GFX9: s_load_dword [[VAL0:s[0-9]+]]
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], [[VAL0]], 0x1c8
; GFX9: ; use [[PACKED]]
-define amdgpu_kernel void @s_pack_v2i16_imm_hi(i32 addrspace(2)* %in0) #0 {
- %val0 = load i32, i32 addrspace(2)* %in0
+define amdgpu_kernel void @s_pack_v2i16_imm_hi(i32 addrspace(4)* %in0) #0 {
+ %val0 = load i32, i32 addrspace(4)* %in0
%lo = trunc i32 %val0 to i16
%vec.0 = insertelement <2 x i16> undef, i16 %lo, i32 0
%vec.1 = insertelement <2 x i16> %vec.0, i16 456, i32 1
Modified: llvm/trunk/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll Tue Feb 13 10:00:25 2018
@@ -1,6 +1,6 @@
; RUN: llc -filetype=obj -march=r600 -mcpu=cypress -verify-machineinstrs < %s | llvm-readobj -relocations -symbols | FileCheck %s
- at arr = internal unnamed_addr addrspace(2) constant [4 x i32] [i32 4, i32 5, i32 6, i32 7], align 4
+ at arr = internal unnamed_addr addrspace(4) constant [4 x i32] [i32 4, i32 5, i32 6, i32 7], align 4
; CHECK: Relocations [
; CHECK: Section (3) .rel.text {
@@ -19,8 +19,8 @@
; CHECK: }
define amdgpu_kernel void @test_constant_array_fixup(i32 addrspace(1)* nocapture %out, i32 %idx) #0 {
entry:
- %arrayidx = getelementptr inbounds [4 x i32], [4 x i32] addrspace(2)* @arr, i32 0, i32 %idx
- %val = load i32, i32 addrspace(2)* %arrayidx
+ %arrayidx = getelementptr inbounds [4 x i32], [4 x i32] addrspace(4)* @arr, i32 0, i32 %idx
+ %val = load i32, i32 addrspace(4)* %arrayidx
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/readcyclecounter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/readcyclecounter.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/readcyclecounter.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/readcyclecounter.ll Tue Feb 13 10:00:25 2018
@@ -28,9 +28,9 @@ define amdgpu_kernel void @test_readcycl
; SI-DAG: s_memtime
; VI-DAG: s_memrealtime
; GCN-DAG: s_load_dword
-define amdgpu_cs i32 @test_readcyclecounter_smem(i64 addrspace(2)* inreg %in) #0 {
+define amdgpu_cs i32 @test_readcyclecounter_smem(i64 addrspace(4)* inreg %in) #0 {
%cycle0 = call i64 @llvm.readcyclecounter()
- %in.v = load i64, i64 addrspace(2)* %in
+ %in.v = load i64, i64 addrspace(4)* %in
%r.64 = add i64 %cycle0, %in.v
%r.32 = trunc i64 %r.64 to i32
ret i32 %r.32
Modified: llvm/trunk/test/CodeGen/AMDGPU/ret.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ret.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ret.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ret.ll Tue Feb 13 10:00:25 2018
@@ -7,7 +7,7 @@
; GCN: s_waitcnt expcnt(0)
; GCN: v_add_f32_e32 v0, 1.0, v0
; GCN-NOT: s_endpgm
-define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
+define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
%x = fadd float %arg3, 1.000000e+00
@@ -26,7 +26,7 @@ bb:
; GCN-DAG: v_mov_b32_e32 v3, -1.0
; GCN: s_waitcnt expcnt(0)
; GCN-NOT: s_endpgm
-define amdgpu_vs { float, float, float, float } @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
+define amdgpu_vs { float, float, float, float } @vgpr_literal([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
ret { float, float, float, float } { float 1.000000e+00, float 2.000000e+00, float 4.000000e+00, float -1.000000e+00 }
@@ -43,7 +43,7 @@ bb:
; GCN: v_mov_b32_e32 v3, v4
; GCN: v_mov_b32_e32 v4, v6
; GCN-NOT: s_endpgm
-define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
+define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
bb:
%i0 = extractelement <2 x i32> %arg4, i32 0
%i1 = extractelement <2 x i32> %arg4, i32 1
@@ -68,7 +68,7 @@ bb:
; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
; GCN: v_mov_b32_e32 v0, 1.0
; GCN-NOT: s_endpgm
-define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
+define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
bb:
ret float 1.000000e+00
}
@@ -82,7 +82,7 @@ bb:
; GCN-DAG: v_mov_b32_e32 v1, v2
; GCN: v_mov_b32_e32 v2, v3
; GCN-NOT: s_endpgm
-define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
+define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
bb:
%f = bitcast <2 x i32> %arg8 to <2 x float>
%s = insertvalue { float, <2 x float> } undef, float %arg14, 0
@@ -101,7 +101,7 @@ bb:
; GCN-DAG: v_mov_b32_e32 v3, v6
; GCN-DAG: v_mov_b32_e32 v4, v8
; GCN-NOT: s_endpgm
-define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 {
+define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 {
bb:
%i0 = extractelement <2 x i32> %arg4, i32 0
%i1 = extractelement <2 x i32> %arg4, i32 1
@@ -130,7 +130,7 @@ bb:
; GCN: v_mov_b32_e32 v3, v8
; GCN: v_mov_b32_e32 v4, v12
; GCN-NOT: s_endpgm
-define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 {
+define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 {
bb:
%i0 = extractelement <2 x i32> %arg4, i32 0
%i1 = extractelement <2 x i32> %arg4, i32 1
@@ -159,7 +159,7 @@ bb:
; GCN: v_mov_b32_e32 v3, v4
; GCN: v_mov_b32_e32 v4, v8
; GCN-NOT: s_endpgm
-define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 {
+define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 {
bb:
%i0 = extractelement <2 x i32> %arg4, i32 0
%i1 = extractelement <2 x i32> %arg4, i32 1
@@ -181,7 +181,7 @@ bb:
; GCN: s_add_i32 s0, s3, 2
; GCN: s_mov_b32 s2, s3
; GCN-NOT: s_endpgm
-define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
+define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
%x = add i32 %arg2, 2
%a = insertvalue { i32, i32, i32 } undef, i32 %x, 0
@@ -197,7 +197,7 @@ bb:
; GCN-DAG: s_mov_b32 s2, 7
; GCN-DAG: s_mov_b32 s3, 8
; GCN-NOT: s_endpgm
-define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
+define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
%x = add i32 %arg2, 2
ret { i32, i32, i32, i32 } { i32 5, i32 6, i32 7, i32 8 }
@@ -212,7 +212,7 @@ bb:
; GCN-DAG: s_add_i32 s0, s3, 2
; GCN-DAG: s_mov_b32 s2, s3
; GCN-NOT: s_endpgm
-define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
+define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
%v = fadd float %arg3, 1.000000e+00
@@ -235,7 +235,7 @@ bb:
; GCN-DAG: v_mov_b32_e32 v1, 2.0
; GCN-DAG: v_mov_b32_e32 v2, 4.0
; GCN: s_waitcnt expcnt(0)
-define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
+define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
ret { { float, i32 }, { i32, <2 x float> } } { { float, i32 } { float 1.000000e+00, i32 2 }, { i32, <2 x float> } { i32 3, <2 x float> <float 2.000000e+00, float 4.000000e+00> } }
Modified: llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll Tue Feb 13 10:00:25 2018
@@ -65,24 +65,24 @@ done:
; GCN: v_mov_b32_e32 [[V_OUT:v[0-9]+]], [[OUT]]
; GCN-NOHSA: buffer_store_dword [[V_OUT]]
; GCN-HSA: flat_store_dword {{.*}}, [[V_OUT]]
-define amdgpu_kernel void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @smrd_valu(i32 addrspace(4)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 {
entry:
%tmp = icmp ne i32 %a, 0
br i1 %tmp, label %if, label %else
if: ; preds = %entry
- %tmp1 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
+ %tmp1 = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(1)* %in
br label %endif
else: ; preds = %entry
- %tmp2 = getelementptr i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
- %tmp3 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %tmp2
+ %tmp2 = getelementptr i32 addrspace(4)*, i32 addrspace(4)* addrspace(1)* %in
+ %tmp3 = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(1)* %tmp2
br label %endif
endif: ; preds = %else, %if
- %tmp4 = phi i32 addrspace(2)* [ %tmp1, %if ], [ %tmp3, %else ]
- %tmp5 = getelementptr i32, i32 addrspace(2)* %tmp4, i32 3000
- %tmp6 = load i32, i32 addrspace(2)* %tmp5
+ %tmp4 = phi i32 addrspace(4)* [ %tmp1, %if ], [ %tmp3, %else ]
+ %tmp5 = getelementptr i32, i32 addrspace(4)* %tmp4, i32 3000
+ %tmp6 = load i32, i32 addrspace(4)* %tmp5
store i32 %tmp6, i32 addrspace(1)* %out
ret void
}
@@ -93,12 +93,12 @@ endif:
; GCN-NOHSA-NOT: v_add
; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16{{$}}
; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
-define amdgpu_kernel void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) #1 {
+define amdgpu_kernel void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(4)* %in) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = add i32 %tmp, 4
- %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4
- %tmp3 = load i32, i32 addrspace(2)* %tmp2
+ %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(4)* %in, i32 %tmp, i32 4
+ %tmp3 = load i32, i32 addrspace(4)* %tmp2
store i32 %tmp3, i32 addrspace(1)* %out
ret void
}
@@ -113,12 +113,12 @@ entry:
; GCN-NOHSA: buffer_store_dword
; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}
-define amdgpu_kernel void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 {
+define amdgpu_kernel void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(4)* %in, i32 %c) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
- %tmp2 = getelementptr i32, i32 addrspace(2)* %in, i32 %tmp
- %tmp3 = getelementptr i32, i32 addrspace(2)* %tmp2, i32 5000
- %tmp4 = load i32, i32 addrspace(2)* %tmp3
+ %tmp2 = getelementptr i32, i32 addrspace(4)* %in, i32 %tmp
+ %tmp3 = getelementptr i32, i32 addrspace(4)* %tmp2, i32 5000
+ %tmp4 = load i32, i32 addrspace(4)* %tmp3
%tmp5 = add i32 %tmp4, %c
store i32 %tmp5, i32 addrspace(1)* %out
ret void
@@ -133,12 +133,12 @@ entry:
; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; GCN-NOHSA: buffer_store_dwordx2
; GCN-HSA: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
-define amdgpu_kernel void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(2)* %in, i64 %c) #1 {
+define amdgpu_kernel void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(4)* %in, i64 %c) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
- %tmp2 = getelementptr i64, i64 addrspace(2)* %in, i32 %tmp
- %tmp3 = getelementptr i64, i64 addrspace(2)* %tmp2, i32 5000
- %tmp4 = load i64, i64 addrspace(2)* %tmp3
+ %tmp2 = getelementptr i64, i64 addrspace(4)* %in, i32 %tmp
+ %tmp3 = getelementptr i64, i64 addrspace(4)* %tmp2, i32 5000
+ %tmp4 = load i64, i64 addrspace(4)* %tmp3
%tmp5 = or i64 %tmp4, %c
store i64 %tmp5, i64 addrspace(1)* %out
ret void
@@ -155,12 +155,12 @@ entry:
; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; GCN-NOHSA: buffer_store_dwordx4
; GCN-HSA: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
-define amdgpu_kernel void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in, <4 x i32> %c) #1 {
+define amdgpu_kernel void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(4)* %in, <4 x i32> %c) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
- %tmp2 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %in, i32 %tmp
- %tmp3 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %tmp2, i32 1234
- %tmp4 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp3
+ %tmp2 = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %in, i32 %tmp
+ %tmp3 = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %tmp2, i32 1234
+ %tmp4 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp3
%tmp5 = or <4 x i32> %tmp4, %c
store <4 x i32> %tmp5, <4 x i32> addrspace(1)* %out
ret void
@@ -189,12 +189,12 @@ entry:
; GCN-NOHSA: buffer_store_dwordx4
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
-define amdgpu_kernel void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in, <8 x i32> %c) #1 {
+define amdgpu_kernel void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(4)* %in, <8 x i32> %c) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
- %tmp2 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %in, i32 %tmp
- %tmp3 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %tmp2, i32 1234
- %tmp4 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp3
+ %tmp2 = getelementptr <8 x i32>, <8 x i32> addrspace(4)* %in, i32 %tmp
+ %tmp3 = getelementptr <8 x i32>, <8 x i32> addrspace(4)* %tmp2, i32 1234
+ %tmp4 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp3
%tmp5 = or <8 x i32> %tmp4, %c
store <8 x i32> %tmp5, <8 x i32> addrspace(1)* %out
ret void
@@ -230,12 +230,12 @@ entry:
; GCN-HSA: flat_load_dwordx4
; GCN: s_endpgm
-define amdgpu_kernel void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in, <16 x i32> %c) #1 {
+define amdgpu_kernel void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(4)* %in, <16 x i32> %c) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
- %tmp2 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %in, i32 %tmp
- %tmp3 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %tmp2, i32 1234
- %tmp4 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp3
+ %tmp2 = getelementptr <16 x i32>, <16 x i32> addrspace(4)* %in, i32 %tmp
+ %tmp3 = getelementptr <16 x i32>, <16 x i32> addrspace(4)* %tmp2, i32 1234
+ %tmp4 = load <16 x i32>, <16 x i32> addrspace(4)* %tmp3
%tmp5 = or <16 x i32> %tmp4, %c
store <16 x i32> %tmp5, <16 x i32> addrspace(1)* %out
ret void
@@ -247,12 +247,12 @@ entry:
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]]
; GCN-NOHSA: buffer_store_dword [[ADD]]
; GCN-HSA: flat_store_dword {{.*}}, [[ADD]]
-define amdgpu_kernel void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 {
+define amdgpu_kernel void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(4)* %in, i32 %a) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = add i32 %tmp, 4
- %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4
- %tmp3 = load i32, i32 addrspace(2)* %tmp2
+ %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(4)* %in, i32 %tmp, i32 4
+ %tmp3 = load i32, i32 addrspace(4)* %tmp2
%tmp4 = add i32 %tmp3, %a
store i32 %tmp4, i32 addrspace(1)* %out
ret void
@@ -261,12 +261,12 @@ entry:
; GCN-LABEL: {{^}}smrd_valu2_max_smrd_offset:
; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}}
; GCN-HSA flat_load_dword v{{[0-9]}}, v{{[0-9]+:[0-9]+}}
-define amdgpu_kernel void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 {
+define amdgpu_kernel void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(4)* %in) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = add i32 %tmp, 4
- %tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 255
- %tmp3 = load i32, i32 addrspace(2)* %tmp2
+ %tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(4)* %in, i32 %tmp, i32 255
+ %tmp3 = load i32, i32 addrspace(4)* %tmp2
store i32 %tmp3, i32 addrspace(1)* %out
ret void
}
@@ -275,12 +275,12 @@ entry:
; GCN-NOHSA-NOT: v_add
; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1024{{$}}
; GCN-HSA: flat_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}]
-define amdgpu_kernel void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 {
+define amdgpu_kernel void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(4)* %in) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = add i32 %tmp, 4
- %tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 256
- %tmp3 = load i32, i32 addrspace(2)* %tmp2
+ %tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(4)* %in, i32 %tmp, i32 256
+ %tmp3 = load i32, i32 addrspace(4)* %tmp2
store i32 %tmp3, i32 addrspace(1)* %out
ret void
}
@@ -290,12 +290,12 @@ entry:
; GCN-NOHSA: buffer_load_dwordx4
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
-define amdgpu_kernel void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
+define amdgpu_kernel void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(4)* nocapture readonly %in) #1 {
entry:
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
- %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
- %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
- %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4
+ %tmp1 = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tmp0
+ %tmp2 = bitcast i32 addrspace(4)* %tmp1 to <8 x i32> addrspace(4)*
+ %tmp3 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp2, align 4
store <8 x i32> %tmp3, <8 x i32> addrspace(1)* %out, align 32
ret void
}
@@ -313,12 +313,12 @@ entry:
; GCN-NOHSA: buffer_store_dword
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
-define amdgpu_kernel void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
+define amdgpu_kernel void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(4)* nocapture readonly %in) #1 {
entry:
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
- %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
- %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
- %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4
+ %tmp1 = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tmp0
+ %tmp2 = bitcast i32 addrspace(4)* %tmp1 to <8 x i32> addrspace(4)*
+ %tmp3 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp2, align 4
%elt0 = extractelement <8 x i32> %tmp3, i32 0
%elt1 = extractelement <8 x i32> %tmp3, i32 1
@@ -350,12 +350,12 @@ entry:
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
-define amdgpu_kernel void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
+define amdgpu_kernel void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(4)* nocapture readonly %in) #1 {
entry:
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
- %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
- %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
- %tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4
+ %tmp1 = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tmp0
+ %tmp2 = bitcast i32 addrspace(4)* %tmp1 to <16 x i32> addrspace(4)*
+ %tmp3 = load <16 x i32>, <16 x i32> addrspace(4)* %tmp2, align 4
store <16 x i32> %tmp3, <16 x i32> addrspace(1)* %out, align 32
ret void
}
@@ -385,12 +385,12 @@ entry:
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
-define amdgpu_kernel void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
+define amdgpu_kernel void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(4)* nocapture readonly %in) #1 {
entry:
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
- %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
- %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
- %tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4
+ %tmp1 = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tmp0
+ %tmp2 = bitcast i32 addrspace(4)* %tmp1 to <16 x i32> addrspace(4)*
+ %tmp3 = load <16 x i32>, <16 x i32> addrspace(4)* %tmp2, align 4
%elt0 = extractelement <16 x i32> %tmp3, i32 0
%elt1 = extractelement <16 x i32> %tmp3, i32 1
Modified: llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir Tue Feb 13 10:00:25 2018
@@ -15,11 +15,11 @@
bb:
%0 = getelementptr i32, i32 addrspace(1)* %arg1, i64 0, !amdgpu.uniform !3, !amdgpu.noclobber !3
%tmp5 = alloca %struct.wombat, align 16, addrspace(5)
- %1 = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
- %2 = bitcast i8 addrspace(2)* %1 to i32 addrspace(2)*
- %3 = getelementptr inbounds i32, i32 addrspace(2)* %2, i64 1
- %4 = bitcast i32 addrspace(2)* %3 to <2 x i32> addrspace(2)*, !amdgpu.uniform !3, !amdgpu.noclobber !3
- %5 = load <2 x i32>, <2 x i32> addrspace(2)* %4, align 4, !invariant.load !3
+ %1 = call noalias nonnull dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+ %2 = bitcast i8 addrspace(4)* %1 to i32 addrspace(4)*
+ %3 = getelementptr inbounds i32, i32 addrspace(4)* %2, i64 1
+ %4 = bitcast i32 addrspace(4)* %3 to <2 x i32> addrspace(4)*, !amdgpu.uniform !3, !amdgpu.noclobber !3
+ %5 = load <2 x i32>, <2 x i32> addrspace(4)* %4, align 4, !invariant.load !3
%6 = extractelement <2 x i32> %5, i32 0
%7 = extractelement <2 x i32> %5, i32 1
%8 = lshr i32 %6, 16
@@ -32,7 +32,7 @@
%15 = add i32 %13, %14
%16 = add i32 %15, %11
%17 = getelementptr inbounds [256 x [16 x i8]], [256 x [16 x i8]] addrspace(3)* @sched_dbg_value_crash.tmp6, i32 0, i32 %16
- %tmp7 = load i64, i64 addrspace(2)* null, align 536870912
+ %tmp7 = load i64, i64 addrspace(4)* null, align 536870912
%tmp8 = tail call i32 @llvm.amdgcn.workitem.id.x() #3, !range !4
%tmp9 = zext i32 %tmp8 to i64
%tmp10 = add i64 %tmp7, %tmp9
@@ -141,7 +141,7 @@
declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
- declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #1
+ declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
declare i32 @llvm.amdgcn.workitem.id.y() #1
declare i32 @llvm.amdgcn.workitem.id.z() #1
declare void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i32, i1) #0
@@ -199,9 +199,9 @@ body: |
%2:vgpr_32 = COPY $vgpr2
%1:vgpr_32 = COPY $vgpr1
%0:vgpr_32 = COPY $vgpr0
- %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
- %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
- %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 16, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+ %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 16, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 24, 0
%9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 32, 0
%10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 4, 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg.ll Tue Feb 13 10:00:25 2018
@@ -528,8 +528,8 @@ define amdgpu_kernel void @v_sext_in_reg
; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15
; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15
-define amdgpu_kernel void @s_sext_in_reg_i1_i16(i16 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
- %ld = load i32, i32 addrspace(2)* %ptr
+define amdgpu_kernel void @s_sext_in_reg_i1_i16(i16 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
+ %ld = load i32, i32 addrspace(4)* %ptr
%in = trunc i32 %ld to i16
%shl = shl i16 %in, 15
%sext = ashr i16 %shl, 15
@@ -547,8 +547,8 @@ define amdgpu_kernel void @s_sext_in_reg
; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14
; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14
-define amdgpu_kernel void @s_sext_in_reg_i2_i16(i16 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
- %ld = load i32, i32 addrspace(2)* %ptr
+define amdgpu_kernel void @s_sext_in_reg_i2_i16(i16 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
+ %ld = load i32, i32 addrspace(4)* %ptr
%in = trunc i32 %ld to i16
%shl = shl i16 %in, 14
%sext = ashr i16 %shl, 14
Modified: llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll Tue Feb 13 10:00:25 2018
@@ -4,10 +4,10 @@
; CHECK-LABEL: {{^}}phi1:
; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]]
-define amdgpu_ps void @phi1(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @phi1(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <8 x i32> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
- %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 0)
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 32)
@@ -28,10 +28,10 @@ ENDIF:
; Make sure this program doesn't crash
; CHECK-LABEL: {{^}}phi2:
-define amdgpu_ps void @phi2(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
+define amdgpu_ps void @phi2(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <8 x i32> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
main_body:
- %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
- %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 32)
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 36)
@@ -47,10 +47,10 @@ main_body:
%tmp33 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 84)
%tmp34 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 88)
%tmp35 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 92)
- %tmp36 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0
- %tmp37 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp36, !tbaa !0
- %tmp38 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg1, i32 0
- %tmp39 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp38, !tbaa !0
+ %tmp36 = getelementptr <8 x i32>, <8 x i32> addrspace(4)* %arg2, i32 0
+ %tmp37 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp36, !tbaa !0
+ %tmp38 = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg1, i32 0
+ %tmp39 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp38, !tbaa !0
%i.i = extractelement <2 x i32> %arg5, i32 0
%j.i = extractelement <2 x i32> %arg5, i32 1
%i.f.i = bitcast i32 %i.i to float
@@ -173,10 +173,10 @@ ENDIF24:
; We just want ot make sure the program doesn't crash
; CHECK-LABEL: {{^}}loop:
-define amdgpu_ps void @loop(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @loop(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <8 x i32> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
- %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 0)
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 4)
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 8)
@@ -226,15 +226,15 @@ ENDIF:
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[SAMPLE_LO]]:[[SAMPLE_HI]]{{\]}}
; CHECK: exp
; CHECK: s_endpgm
-define amdgpu_ps void @sample_v3([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define amdgpu_ps void @sample_v3([17 x <4 x i32>] addrspace(4)* byval %arg, [32 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <8 x i32>] addrspace(4)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
entry:
- %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i64 0, i32 0
- %tmp21 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+ %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i64 0, i32 0
+ %tmp21 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 16)
- %tmp23 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
- %tmp24 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp23, !tbaa !0
- %tmp25 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 0
- %tmp26 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp25, !tbaa !0
+ %tmp23 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 0
+ %tmp24 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp23, !tbaa !0
+ %tmp25 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 0
+ %tmp26 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp25, !tbaa !0
%tmp27 = fcmp oeq float %tmp22, 0.000000e+00
%tmp26.bc = bitcast <4 x i32> %tmp26 to <4 x i32>
br i1 %tmp27, label %if, label %else
@@ -290,7 +290,7 @@ endif:
; This test is just checking that we don't crash / assertion fail.
; CHECK-LABEL: {{^}}copy2:
; CHECK: s_endpgm
-define amdgpu_ps void @copy2([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define amdgpu_ps void @copy2([17 x <4 x i32>] addrspace(4)* byval %arg, [32 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <8 x i32>] addrspace(4)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
entry:
br label %LOOP68
@@ -326,15 +326,15 @@ ENDIF69:
; [[END]]:
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}}
; CHECK: s_endpgm
-define amdgpu_ps void @sample_rsrc([6 x <4 x i32>] addrspace(2)* byval %arg, [17 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
+define amdgpu_ps void @sample_rsrc([6 x <4 x i32>] addrspace(4)* byval %arg, [17 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <4 x i32>] addrspace(4)* byval %arg2, [32 x <8 x i32>] addrspace(4)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
bb:
- %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg1, i32 0, i32 0
- %tmp22 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !3
+ %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg1, i32 0, i32 0
+ %tmp22 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !3
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp22, i32 16)
- %tmp25 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %arg3, i32 0, i32 0
- %tmp26 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp25, !tbaa !3
- %tmp27 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %arg2, i32 0, i32 0
- %tmp28 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp27, !tbaa !3
+ %tmp25 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(4)* %arg3, i32 0, i32 0
+ %tmp26 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp25, !tbaa !3
+ %tmp27 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(4)* %arg2, i32 0, i32 0
+ %tmp28 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp27, !tbaa !3
%i.i = extractelement <2 x i32> %arg7, i32 0
%j.i = extractelement <2 x i32> %arg7, i32 1
%i.f.i = bitcast i32 %i.i to float
@@ -382,11 +382,11 @@ bb71:
; Check the resource descriptor is stored in an sgpr.
; CHECK-LABEL: {{^}}mimg_srsrc_sgpr:
; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
-define amdgpu_ps void @mimg_srsrc_sgpr([34 x <8 x i32>] addrspace(2)* byval %arg) #0 {
+define amdgpu_ps void @mimg_srsrc_sgpr([34 x <8 x i32>] addrspace(4)* byval %arg) #0 {
bb:
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
- %tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %arg, i32 0, i32 %tid
- %tmp8 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp7, align 32, !tbaa !0
+ %tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(4)* %arg, i32 0, i32 %tid
+ %tmp8 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp7, align 32, !tbaa !0
%tmp = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 7.500000e-01, float 2.500000e-01>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp10 = extractelement <4 x float> %tmp, i32 0
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %tmp10)
@@ -397,11 +397,11 @@ bb:
; Check the sampler is stored in an sgpr.
; CHECK-LABEL: {{^}}mimg_ssamp_sgpr:
; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
-define amdgpu_ps void @mimg_ssamp_sgpr([17 x <4 x i32>] addrspace(2)* byval %arg) #0 {
+define amdgpu_ps void @mimg_ssamp_sgpr([17 x <4 x i32>] addrspace(4)* byval %arg) #0 {
bb:
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
- %tmp7 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i32 0, i32 %tid
- %tmp8 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp7, align 16, !tbaa !0
+ %tmp7 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i32 0, i32 %tid
+ %tmp8 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp7, align 16, !tbaa !0
%tmp = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 7.500000e-01, float 2.500000e-01>, <8 x i32> undef, <4 x i32> %tmp8, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp10 = extractelement <4 x float> %tmp, i32 0
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef)
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll Tue Feb 13 10:00:25 2018
@@ -6,15 +6,15 @@
; GCN-LABEL: {{^}}main:
; GCN: image_sample_b v{{\[[0-9]:[0-9]\]}}, v{{\[[0-9]:[0-9]\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf
-define amdgpu_ps void @main(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @main(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <8 x i32> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
- %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
- %tmp22 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0
- %tmp23 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp22, !tbaa !0
- %tmp24 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg1, i32 0
- %tmp25 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp24, !tbaa !0
+ %tmp22 = getelementptr <8 x i32>, <8 x i32> addrspace(4)* %arg2, i32 0
+ %tmp23 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp22, !tbaa !0
+ %tmp24 = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg1, i32 0
+ %tmp25 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp24, !tbaa !0
%i.i = extractelement <2 x i32> %arg5, i32 0
%j.i = extractelement <2 x i32> %arg5, i32 1
%i.f.i = bitcast i32 %i.i to float
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll Tue Feb 13 10:00:25 2018
@@ -16,12 +16,12 @@
; CHECK: s_waitcnt vmcnt(0)
; CHECK: exp
; CHECK: s_endpgm
-define amdgpu_ps void @main([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
+define amdgpu_ps void @main([6 x <16 x i8>] addrspace(4)* byval %arg, [17 x <16 x i8>] addrspace(4)* byval %arg1, [17 x <4 x i32>] addrspace(4)* byval %arg2, [34 x <8 x i32>] addrspace(4)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
main_body:
- %tmp = bitcast [34 x <8 x i32>] addrspace(2)* %arg3 to <32 x i8> addrspace(2)*
- %tmp22 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp, align 32, !tbaa !0
- %tmp23 = bitcast [17 x <4 x i32>] addrspace(2)* %arg2 to <16 x i8> addrspace(2)*
- %tmp24 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp23, align 16, !tbaa !0
+ %tmp = bitcast [34 x <8 x i32>] addrspace(4)* %arg3 to <32 x i8> addrspace(4)*
+ %tmp22 = load <32 x i8>, <32 x i8> addrspace(4)* %tmp, align 32, !tbaa !0
+ %tmp23 = bitcast [17 x <4 x i32>] addrspace(4)* %arg2 to <16 x i8> addrspace(4)*
+ %tmp24 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp23, align 16, !tbaa !0
%i.i = extractelement <2 x i32> %arg11, i32 0
%j.i = extractelement <2 x i32> %arg11, i32 1
%i.f.i = bitcast i32 %i.i to float
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll Tue Feb 13 10:00:25 2018
@@ -24,10 +24,10 @@
; GCN: s_endpgm
; TOVGPR: ScratchSize: 0{{$}}
-define amdgpu_ps void @main([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
+define amdgpu_ps void @main([17 x <4 x i32>] addrspace(4)* byval %arg, [32 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <8 x i32>] addrspace(4)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
main_body:
- %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i64 0, i32 0
- %tmp21 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+ %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i64 0, i32 0
+ %tmp21 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 96)
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 100)
%tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 104)
@@ -66,39 +66,39 @@ main_body:
%tmp57 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 372)
%tmp58 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 376)
%tmp59 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 384)
- %tmp60 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
- %tmp61 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp60, !tbaa !0
- %tmp62 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 0
- %tmp63 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp62, !tbaa !0
+ %tmp60 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 0
+ %tmp61 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp60, !tbaa !0
+ %tmp62 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 0
+ %tmp63 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp62, !tbaa !0
%tmp63.bc = bitcast <4 x i32> %tmp63 to <4 x i32>
- %tmp64 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 1
- %tmp65 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp64, !tbaa !0
- %tmp66 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 1
- %tmp67 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp66, !tbaa !0
- %tmp68 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 2
- %tmp69 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp68, !tbaa !0
- %tmp70 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 2
- %tmp71 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp70, !tbaa !0
- %tmp72 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 3
- %tmp73 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp72, !tbaa !0
- %tmp74 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 3
- %tmp75 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp74, !tbaa !0
- %tmp76 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 4
- %tmp77 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp76, !tbaa !0
- %tmp78 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 4
- %tmp79 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp78, !tbaa !0
- %tmp80 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 5
- %tmp81 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp80, !tbaa !0
- %tmp82 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 5
- %tmp83 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp82, !tbaa !0
- %tmp84 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 6
- %tmp85 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp84, !tbaa !0
- %tmp86 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 6
- %tmp87 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp86, !tbaa !0
- %tmp88 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 7
- %tmp89 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp88, !tbaa !0
- %tmp90 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 7
- %tmp91 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp90, !tbaa !0
+ %tmp64 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 1
+ %tmp65 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp64, !tbaa !0
+ %tmp66 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 1
+ %tmp67 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp66, !tbaa !0
+ %tmp68 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 2
+ %tmp69 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp68, !tbaa !0
+ %tmp70 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 2
+ %tmp71 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp70, !tbaa !0
+ %tmp72 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 3
+ %tmp73 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp72, !tbaa !0
+ %tmp74 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 3
+ %tmp75 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp74, !tbaa !0
+ %tmp76 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 4
+ %tmp77 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp76, !tbaa !0
+ %tmp78 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 4
+ %tmp79 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp78, !tbaa !0
+ %tmp80 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 5
+ %tmp81 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp80, !tbaa !0
+ %tmp82 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 5
+ %tmp83 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp82, !tbaa !0
+ %tmp84 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 6
+ %tmp85 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp84, !tbaa !0
+ %tmp86 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 6
+ %tmp87 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp86, !tbaa !0
+ %tmp88 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 7
+ %tmp89 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp88, !tbaa !0
+ %tmp90 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 7
+ %tmp91 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp90, !tbaa !0
%i.i = extractelement <2 x i32> %arg6, i32 0
%j.i = extractelement <2 x i32> %arg6, i32 1
%i.f.i = bitcast i32 %i.i to float
@@ -778,10 +778,10 @@ ENDIF66:
; GCN-LABEL: {{^}}main1:
; GCN: s_endpgm
; TOVGPR: ScratchSize: 0{{$}}
-define amdgpu_ps void @main1([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define amdgpu_ps void @main1([17 x <4 x i32>] addrspace(4)* byval %arg, [32 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <8 x i32>] addrspace(4)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
main_body:
- %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i64 0, i32 0
- %tmp21 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+ %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i64 0, i32 0
+ %tmp21 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 0)
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 4)
%tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 8)
@@ -885,42 +885,42 @@ main_body:
%tmp122 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 716)
%tmp123 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 864)
%tmp124 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 868)
- %tmp125 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
- %tmp126 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp125, !tbaa !0
- %tmp127 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 0
- %tmp128 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp127, !tbaa !0
- %tmp129 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 1
- %tmp130 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp129, !tbaa !0
- %tmp131 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 1
- %tmp132 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp131, !tbaa !0
- %tmp133 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 2
- %tmp134 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp133, !tbaa !0
- %tmp135 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 2
- %tmp136 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp135, !tbaa !0
- %tmp137 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 3
- %tmp138 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp137, !tbaa !0
- %tmp139 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 3
- %tmp140 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp139, !tbaa !0
- %tmp141 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 4
- %tmp142 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp141, !tbaa !0
- %tmp143 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 4
- %tmp144 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp143, !tbaa !0
- %tmp145 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 5
- %tmp146 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp145, !tbaa !0
- %tmp147 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 5
- %tmp148 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp147, !tbaa !0
- %tmp149 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 6
- %tmp150 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp149, !tbaa !0
- %tmp151 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 6
- %tmp152 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp151, !tbaa !0
- %tmp153 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 7
- %tmp154 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp153, !tbaa !0
- %tmp155 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 7
- %tmp156 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp155, !tbaa !0
- %tmp157 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 8
- %tmp158 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp157, !tbaa !0
- %tmp159 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 8
- %tmp160 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp159, !tbaa !0
+ %tmp125 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 0
+ %tmp126 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp125, !tbaa !0
+ %tmp127 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 0
+ %tmp128 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp127, !tbaa !0
+ %tmp129 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 1
+ %tmp130 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp129, !tbaa !0
+ %tmp131 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 1
+ %tmp132 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp131, !tbaa !0
+ %tmp133 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 2
+ %tmp134 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp133, !tbaa !0
+ %tmp135 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 2
+ %tmp136 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp135, !tbaa !0
+ %tmp137 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 3
+ %tmp138 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp137, !tbaa !0
+ %tmp139 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 3
+ %tmp140 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp139, !tbaa !0
+ %tmp141 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 4
+ %tmp142 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp141, !tbaa !0
+ %tmp143 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 4
+ %tmp144 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp143, !tbaa !0
+ %tmp145 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 5
+ %tmp146 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp145, !tbaa !0
+ %tmp147 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 5
+ %tmp148 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp147, !tbaa !0
+ %tmp149 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 6
+ %tmp150 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp149, !tbaa !0
+ %tmp151 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 6
+ %tmp152 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp151, !tbaa !0
+ %tmp153 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 7
+ %tmp154 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp153, !tbaa !0
+ %tmp155 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 7
+ %tmp156 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp155, !tbaa !0
+ %tmp157 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 8
+ %tmp158 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp157, !tbaa !0
+ %tmp159 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 8
+ %tmp160 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp159, !tbaa !0
%tmp161 = fcmp ugt float %arg17, 0.000000e+00
%tmp162 = select i1 %tmp161, float 1.000000e+00, float 0.000000e+00
%i.i = extractelement <2 x i32> %arg6, i32 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll Tue Feb 13 10:00:25 2018
@@ -8,7 +8,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
@stored_lds_ptr = addrspace(3) global i32 addrspace(3)* undef, align 4
- at stored_constant_ptr = addrspace(3) global i32 addrspace(2)* undef, align 8
+ at stored_constant_ptr = addrspace(3) global i32 addrspace(4)* undef, align 8
@stored_global_ptr = addrspace(3) global i32 addrspace(1)* undef, align 8
; GCN-LABEL: {{^}}reorder_local_load_global_store_local_load:
@@ -100,14 +100,14 @@ define amdgpu_kernel void @no_reorder_ba
; CI: buffer_store_dword
; GFX9: global_store_dword
define amdgpu_kernel void @reorder_constant_load_global_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
- %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
+ %ptr0 = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(3)* @stored_constant_ptr, align 8
- %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
- %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 3
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 1
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 3
- %tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
+ %tmp1 = load i32, i32 addrspace(4)* %ptr1, align 4
store i32 99, i32 addrspace(1)* %gptr, align 4
- %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(4)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2
@@ -129,14 +129,14 @@ define amdgpu_kernel void @reorder_const
; CI: buffer_store_dword
; GFX9: global_store_dword
define amdgpu_kernel void @reorder_constant_load_local_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr) #0 {
- %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
+ %ptr0 = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(3)* @stored_constant_ptr, align 8
- %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
- %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 3
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 1
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 3
- %tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
+ %tmp1 = load i32, i32 addrspace(4)* %ptr1, align 4
store i32 99, i32 addrspace(3)* %lptr, align 4
- %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(4)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2
@@ -151,13 +151,13 @@ define amdgpu_kernel void @reorder_const
; GCN: ds_write_b32
; CI: buffer_store_dword
; GFX9: global_store_dword
-define amdgpu_kernel void @reorder_smrd_load_local_store_smrd_load(i32 addrspace(1)* %out, i32 addrspace(3)* noalias %lptr, i32 addrspace(2)* %ptr0) #0 {
- %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
- %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
+define amdgpu_kernel void @reorder_smrd_load_local_store_smrd_load(i32 addrspace(1)* %out, i32 addrspace(3)* noalias %lptr, i32 addrspace(4)* %ptr0) #0 {
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 1
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 2
- %tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
+ %tmp1 = load i32, i32 addrspace(4)* %ptr1, align 4
store i32 99, i32 addrspace(3)* %lptr, align 4
- %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(4)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2
Modified: llvm/trunk/test/CodeGen/AMDGPU/smrd-vccz-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/smrd-vccz-bug.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/smrd-vccz-bug.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/smrd-vccz-bug.ll Tue Feb 13 10:00:25 2018
@@ -12,10 +12,10 @@
; GCN: buffer_store_dword
; GCN: [[EXIT]]:
; GCN: s_endpgm
-define amdgpu_kernel void @vccz_workaround(i32 addrspace(2)* %in, i32 addrspace(1)* %out, float %cond) {
+define amdgpu_kernel void @vccz_workaround(i32 addrspace(4)* %in, i32 addrspace(1)* %out, float %cond) {
entry:
%cnd = fcmp oeq float 0.0, %cond
- %sgpr = load volatile i32, i32 addrspace(2)* %in
+ %sgpr = load volatile i32, i32 addrspace(4)* %in
br i1 %cnd, label %if, label %endif
if:
Modified: llvm/trunk/test/CodeGen/AMDGPU/smrd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/smrd.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/smrd.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/smrd.ll Tue Feb 13 10:00:25 2018
@@ -7,10 +7,10 @@
; GCN-LABEL: {{^}}smrd0:
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
; VIGFX9: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
-define amdgpu_kernel void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
+define amdgpu_kernel void @smrd0(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
entry:
- %tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
- %tmp1 = load i32, i32 addrspace(2)* %tmp
+ %tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 1
+ %tmp1 = load i32, i32 addrspace(4)* %tmp
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@@ -19,10 +19,10 @@ entry:
; GCN-LABEL: {{^}}smrd1:
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
; VIGFX9: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
-define amdgpu_kernel void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
+define amdgpu_kernel void @smrd1(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
entry:
- %tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
- %tmp1 = load i32, i32 addrspace(2)* %tmp
+ %tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 255
+ %tmp1 = load i32, i32 addrspace(4)* %tmp
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@@ -34,10 +34,10 @@ entry:
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
; VIGFX9: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
; GCN: s_endpgm
-define amdgpu_kernel void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
+define amdgpu_kernel void @smrd2(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
entry:
- %tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
- %tmp1 = load i32, i32 addrspace(2)* %tmp
+ %tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 256
+ %tmp1 = load i32, i32 addrspace(4)* %tmp
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@@ -49,10 +49,10 @@ entry:
; SI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b
; TODO: Add VI checks
; GCN: s_endpgm
-define amdgpu_kernel void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
+define amdgpu_kernel void @smrd3(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
entry:
- %tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296
- %tmp1 = load i32, i32 addrspace(2)* %tmp
+ %tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 4294967296
+ %tmp1 = load i32, i32 addrspace(4)* %tmp
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@@ -63,10 +63,10 @@ entry:
; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
; VIGFX9: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
-define amdgpu_kernel void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
+define amdgpu_kernel void @smrd4(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
entry:
- %tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
- %tmp1 = load i32, i32 addrspace(2)* %tmp
+ %tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 262143
+ %tmp1 = load i32, i32 addrspace(4)* %tmp
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@@ -77,10 +77,10 @@ entry:
; SIVIGFX9: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
; GCN: s_endpgm
-define amdgpu_kernel void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
+define amdgpu_kernel void @smrd5(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
entry:
- %tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
- %tmp1 = load i32, i32 addrspace(2)* %tmp
+ %tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 262144
+ %tmp1 = load i32, i32 addrspace(4)* %tmp
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@@ -106,10 +106,10 @@ main_body:
; GCN-LABEL: {{^}}smrd_load_const0:
; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
; VIGFX9: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10
-define amdgpu_ps void @smrd_load_const0(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @smrd_load_const0(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
- %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
@@ -120,10 +120,10 @@ main_body:
; GCN-LABEL: {{^}}smrd_load_const1:
; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
; VIGFX9: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
-define amdgpu_ps void @smrd_load_const1(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @smrd_load_const1(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
- %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1020)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
@@ -137,10 +137,10 @@ main_body:
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
; VIGFX9: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
-define amdgpu_ps void @smrd_load_const2(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @smrd_load_const2(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
- %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1024)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
@@ -152,10 +152,10 @@ main_body:
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
; VIGFX9: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
-define amdgpu_ps void @smrd_load_const3(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @smrd_load_const3(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
- %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1048572)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
@@ -167,10 +167,10 @@ main_body:
; SIVIGFX9: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
; GCN: s_endpgm
-define amdgpu_ps void @smrd_load_const4(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @smrd_load_const4(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
- %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1048576)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
@@ -257,9 +257,9 @@ main_body:
; GCN-LABEL: {{^}}smrd_sgpr_descriptor_promoted
; GCN: v_readfirstlane
-define amdgpu_cs void @smrd_sgpr_descriptor_promoted([0 x i8] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), i32) #0 {
+define amdgpu_cs void @smrd_sgpr_descriptor_promoted([0 x i8] addrspace(4)* inreg noalias dereferenceable(18446744073709551615), i32) #0 {
main_body:
- %descptr = bitcast [0 x i8] addrspace(2)* %0 to <4 x i32> addrspace(2)*, !amdgpu.uniform !0
+ %descptr = bitcast [0 x i8] addrspace(4)* %0 to <4 x i32> addrspace(4)*, !amdgpu.uniform !0
br label %.outer_loop_header
ret_block: ; preds = %.outer, %.label22, %main_body
@@ -275,7 +275,7 @@ ret_block:
br i1 %inner_br1, label %.inner_loop_body, label %ret_block
.inner_loop_body:
- %descriptor = load <4 x i32>, <4 x i32> addrspace(2)* %descptr, align 16, !invariant.load !0
+ %descriptor = load <4 x i32>, <4 x i32> addrspace(4)* %descptr, align 16, !invariant.load !0
%load1result = call float @llvm.SI.load.const.v4i32(<4 x i32> %descriptor, i32 0)
%inner_br2 = icmp uge i32 %1, 10
br i1 %inner_br2, label %.inner_loop_header, label %.outer_loop_body
Modified: llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll Tue Feb 13 10:00:25 2018
@@ -87,7 +87,7 @@ endif:
; GCN-NOT: v_readlane_b32 m0
; GCN-NOT: s_buffer_store_dword m0
; GCN-NOT: s_buffer_load_dword m0
-define amdgpu_ps void @spill_kill_m0_lds(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %m0) #0 {
+define amdgpu_ps void @spill_kill_m0_lds(<16 x i8> addrspace(4)* inreg %arg, <16 x i8> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %m0) #0 {
main_body:
%tmp = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %m0)
%cmp = fcmp ueq float 0.000000e+00, %tmp
@@ -191,7 +191,7 @@ endif:
; TOSMEM: s_endpgm
define amdgpu_kernel void @restore_m0_lds(i32 %arg) {
%m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={M0}"() #0
- %sval = load volatile i64, i64 addrspace(2)* undef
+ %sval = load volatile i64, i64 addrspace(4)* undef
%cmp = icmp eq i32 %arg, 0
br i1 %cmp, label %ret, label %bb
Modified: llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll Tue Feb 13 10:00:25 2018
@@ -6,7 +6,7 @@
; GCN-LABEL: {{^}}split_smrd_add_worklist:
; GCN: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
-define amdgpu_ps void @split_smrd_add_worklist([34 x <8 x i32>] addrspace(2)* byval %arg) #0 {
+define amdgpu_ps void @split_smrd_add_worklist([34 x <8 x i32>] addrspace(4)* byval %arg) #0 {
bb:
%tmp = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 96)
%tmp1 = bitcast float %tmp to i32
@@ -19,8 +19,8 @@ bb3:
%tmp4 = bitcast float %tmp to i32
%tmp5 = add i32 %tmp4, 4
%tmp6 = sext i32 %tmp5 to i64
- %tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %arg, i64 0, i64 %tmp6
- %tmp8 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp7, align 32, !tbaa !0
+ %tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(4)* %arg, i64 0, i64 %tmp6
+ %tmp8 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp7, align 32, !tbaa !0
%tmp9 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float bitcast (i32 1061158912 to float), float bitcast (i32 1048576000 to float)>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp10 = extractelement <4 x float> %tmp9, i32 0
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef)
Modified: llvm/trunk/test/CodeGen/AMDGPU/store-global.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/store-global.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/store-global.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/store-global.ll Tue Feb 13 10:00:25 2018
@@ -394,11 +394,11 @@ entry:
; SIVI: buffer_store_dwordx2
; GFX9: global_store_dwordx2
-define amdgpu_kernel void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
+define amdgpu_kernel void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* nocapture %mem) #0 {
entry:
- %0 = load i32, i32 addrspace(2)* %mem, align 4
- %arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1
- %1 = load i32, i32 addrspace(2)* %arrayidx1.i, align 4
+ %0 = load i32, i32 addrspace(4)* %mem, align 4
+ %arrayidx1.i = getelementptr inbounds i32, i32 addrspace(4)* %mem, i64 1
+ %1 = load i32, i32 addrspace(4)* %arrayidx1.i, align 4
store i32 %0, i32 addrspace(1)* %out, align 4
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/store-private.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/store-private.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/store-private.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/store-private.ll Tue Feb 13 10:00:25 2018
@@ -689,11 +689,11 @@ entry:
; XSI: buffer_store_dwordx2
; SI: buffer_store_dword
; SI: buffer_store_dword
-define amdgpu_kernel void @vecload2(i32 addrspace(5)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
+define amdgpu_kernel void @vecload2(i32 addrspace(5)* nocapture %out, i32 addrspace(4)* nocapture %mem) #0 {
entry:
- %0 = load i32, i32 addrspace(2)* %mem, align 4
- %arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1
- %1 = load i32, i32 addrspace(2)* %arrayidx1.i, align 4
+ %0 = load i32, i32 addrspace(4)* %mem, align 4
+ %arrayidx1.i = getelementptr inbounds i32, i32 addrspace(4)* %mem, i64 1
+ %1 = load i32, i32 addrspace(4)* %arrayidx1.i, align 4
store i32 %0, i32 addrspace(5)* %out, align 4
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(5)* %out, i64 1
store i32 %1, i32 addrspace(5)* %arrayidx1, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/sub.v2i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sub.v2i16.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sub.v2i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sub.v2i16.ll Tue Feb 13 10:00:25 2018
@@ -27,9 +27,9 @@ define amdgpu_kernel void @v_test_sub_v2
; VI: s_sub_i32
; VI: s_sub_i32
-define amdgpu_kernel void @s_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in0, <2 x i16> addrspace(2)* %in1) #1 {
- %a = load <2 x i16>, <2 x i16> addrspace(2)* %in0
- %b = load <2 x i16>, <2 x i16> addrspace(2)* %in1
+define amdgpu_kernel void @s_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in0, <2 x i16> addrspace(4)* %in1) #1 {
+ %a = load <2 x i16>, <2 x i16> addrspace(4)* %in0
+ %b = load <2 x i16>, <2 x i16> addrspace(4)* %in1
%add = sub <2 x i16> %a, %b
store <2 x i16> %add, <2 x i16> addrspace(1)* %out
ret void
@@ -38,8 +38,8 @@ define amdgpu_kernel void @s_test_sub_v2
; GCN-LABEL: {{^}}s_test_sub_self_v2i16:
; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]]
; GCN: buffer_store_dword [[ZERO]]
-define amdgpu_kernel void @s_test_sub_self_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in0) #1 {
- %a = load <2 x i16>, <2 x i16> addrspace(2)* %in0
+define amdgpu_kernel void @s_test_sub_self_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in0) #1 {
+ %a = load <2 x i16>, <2 x i16> addrspace(4)* %in0
%add = sub <2 x i16> %a, %a
store <2 x i16> %add, <2 x i16> addrspace(1)* %out
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/target-cpu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/target-cpu.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/target-cpu.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/target-cpu.ll Tue Feb 13 10:00:25 2018
@@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
-declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #1
+declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #1
declare i32 @llvm.amdgcn.workitem.id.x() #1
@@ -15,10 +15,10 @@ declare void @llvm.amdgcn.s.dcache.wb()
; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSETREG]]
; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
define amdgpu_kernel void @target_none() #0 {
- %kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
- %kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
- %kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
- %ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
+ %kernargs = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+ %kernargs.gep = getelementptr inbounds i8, i8 addrspace(4)* %kernargs, i64 1024
+ %kernargs.gep.cast = bitcast i8 addrspace(4)* %kernargs.gep to i32 addrspace(1)* addrspace(4)*
+ %ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %kernargs.gep.cast
%id = call i32 @llvm.amdgcn.workitem.id.x()
%id.ext = sext i32 %id to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
@@ -31,10 +31,10 @@ define amdgpu_kernel void @target_none()
; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSETREG]]
; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
define amdgpu_kernel void @target_tahiti() #1 {
- %kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
- %kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
- %kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
- %ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
+ %kernargs = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+ %kernargs.gep = getelementptr inbounds i8, i8 addrspace(4)* %kernargs, i64 1024
+ %kernargs.gep.cast = bitcast i8 addrspace(4)* %kernargs.gep to i32 addrspace(1)* addrspace(4)*
+ %ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %kernargs.gep.cast
%id = call i32 @llvm.amdgcn.workitem.id.x()
%id.ext = sext i32 %id to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
@@ -47,10 +47,10 @@ define amdgpu_kernel void @target_tahiti
; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
; CHECK: s_dcache_inv_vol
define amdgpu_kernel void @target_bonaire() #3 {
- %kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
- %kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
- %kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
- %ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
+ %kernargs = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+ %kernargs.gep = getelementptr inbounds i8, i8 addrspace(4)* %kernargs, i64 1024
+ %kernargs.gep.cast = bitcast i8 addrspace(4)* %kernargs.gep to i32 addrspace(1)* addrspace(4)*
+ %ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %kernargs.gep.cast
%id = call i32 @llvm.amdgcn.workitem.id.x()
%id.ext = sext i32 %id to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
@@ -64,10 +64,10 @@ define amdgpu_kernel void @target_bonair
; CHECK: flat_store_dword
; CHECK: s_dcache_wb{{$}}
define amdgpu_kernel void @target_fiji() #4 {
- %kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
- %kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
- %kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
- %ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
+ %kernargs = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+ %kernargs.gep = getelementptr inbounds i8, i8 addrspace(4)* %kernargs, i64 1024
+ %kernargs.gep.cast = bitcast i8 addrspace(4)* %kernargs.gep to i32 addrspace(1)* addrspace(4)*
+ %ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %kernargs.gep.cast
%id = call i32 @llvm.amdgcn.workitem.id.x()
%id.ext = sext i32 %id to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
Modified: llvm/trunk/test/CodeGen/AMDGPU/unaligned-load-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/unaligned-load-store.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/unaligned-load-store.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/unaligned-load-store.ll Tue Feb 13 10:00:25 2018
@@ -418,8 +418,8 @@ define amdgpu_kernel void @local_store_i
; UNALIGNED: s_load_dword
; SI: buffer_store_dword
-define amdgpu_kernel void @constant_unaligned_load_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 {
- %v = load i32, i32 addrspace(2)* %p, align 1
+define amdgpu_kernel void @constant_unaligned_load_i32(i32 addrspace(4)* %p, i32 addrspace(1)* %r) #0 {
+ %v = load i32, i32 addrspace(4)* %p, align 1
store i32 %v, i32 addrspace(1)* %r, align 4
ret void
}
@@ -430,8 +430,8 @@ define amdgpu_kernel void @constant_unal
; UNALIGNED: s_load_dword
; UNALIGNED: buffer_store_dword
-define amdgpu_kernel void @constant_align2_load_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 {
- %v = load i32, i32 addrspace(2)* %p, align 2
+define amdgpu_kernel void @constant_align2_load_i32(i32 addrspace(4)* %p, i32 addrspace(1)* %r) #0 {
+ %v = load i32, i32 addrspace(4)* %p, align 2
store i32 %v, i32 addrspace(1)* %r, align 4
ret void
}
@@ -444,8 +444,8 @@ define amdgpu_kernel void @constant_alig
; UNALIGNED: s_load_dwordx2
; UNALIGNED: buffer_store_dwordx2
-define amdgpu_kernel void @constant_align2_load_i64(i64 addrspace(2)* %p, i64 addrspace(1)* %r) #0 {
- %v = load i64, i64 addrspace(2)* %p, align 2
+define amdgpu_kernel void @constant_align2_load_i64(i64 addrspace(4)* %p, i64 addrspace(1)* %r) #0 {
+ %v = load i64, i64 addrspace(4)* %p, align 2
store i64 %v, i64 addrspace(1)* %r, align 4
ret void
}
@@ -453,8 +453,8 @@ define amdgpu_kernel void @constant_alig
; SI-LABEL: {{^}}constant_align4_load_i64:
; SI: s_load_dwordx2
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @constant_align4_load_i64(i64 addrspace(2)* %p, i64 addrspace(1)* %r) #0 {
- %v = load i64, i64 addrspace(2)* %p, align 4
+define amdgpu_kernel void @constant_align4_load_i64(i64 addrspace(4)* %p, i64 addrspace(1)* %r) #0 {
+ %v = load i64, i64 addrspace(4)* %p, align 4
store i64 %v, i64 addrspace(1)* %r, align 4
ret void
}
@@ -462,8 +462,8 @@ define amdgpu_kernel void @constant_alig
; SI-LABEL: {{^}}constant_align4_load_v4i32:
; SI: s_load_dwordx4
; SI: buffer_store_dwordx4
-define amdgpu_kernel void @constant_align4_load_v4i32(<4 x i32> addrspace(2)* %p, <4 x i32> addrspace(1)* %r) #0 {
- %v = load <4 x i32>, <4 x i32> addrspace(2)* %p, align 4
+define amdgpu_kernel void @constant_align4_load_v4i32(<4 x i32> addrspace(4)* %p, <4 x i32> addrspace(1)* %r) #0 {
+ %v = load <4 x i32>, <4 x i32> addrspace(4)* %p, align 4
store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 4
ret void
}
@@ -482,8 +482,8 @@ define amdgpu_kernel void @constant_alig
; UNALIGNED: buffer_load_dwordx2
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @constant_unaligned_load_v2i32(<2 x i32> addrspace(2)* %p, <2 x i32> addrspace(1)* %r) #0 {
- %v = load <2 x i32>, <2 x i32> addrspace(2)* %p, align 1
+define amdgpu_kernel void @constant_unaligned_load_v2i32(<2 x i32> addrspace(4)* %p, <2 x i32> addrspace(1)* %r) #0 {
+ %v = load <2 x i32>, <2 x i32> addrspace(4)* %p, align 1
store <2 x i32> %v, <2 x i32> addrspace(1)* %r, align 4
ret void
}
@@ -512,8 +512,8 @@ define amdgpu_kernel void @constant_unal
; UNALIGNED: buffer_load_dwordx4
; SI: buffer_store_dwordx4
-define amdgpu_kernel void @constant_unaligned_load_v4i32(<4 x i32> addrspace(2)* %p, <4 x i32> addrspace(1)* %r) #0 {
- %v = load <4 x i32>, <4 x i32> addrspace(2)* %p, align 1
+define amdgpu_kernel void @constant_unaligned_load_v4i32(<4 x i32> addrspace(4)* %p, <4 x i32> addrspace(1)* %r) #0 {
+ %v = load <4 x i32>, <4 x i32> addrspace(4)* %p, align 1
store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 4
ret void
}
@@ -521,8 +521,8 @@ define amdgpu_kernel void @constant_unal
; SI-LABEL: {{^}}constant_align4_load_i8:
; SI: s_load_dword
; SI: buffer_store_byte
-define amdgpu_kernel void @constant_align4_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) #0 {
- %v = load i8, i8 addrspace(2)* %p, align 4
+define amdgpu_kernel void @constant_align4_load_i8(i8 addrspace(4)* %p, i8 addrspace(1)* %r) #0 {
+ %v = load i8, i8 addrspace(4)* %p, align 4
store i8 %v, i8 addrspace(1)* %r, align 4
ret void
}
@@ -530,8 +530,8 @@ define amdgpu_kernel void @constant_alig
; SI-LABEL: {{^}}constant_align2_load_i8:
; SI: buffer_load_ubyte
; SI: buffer_store_byte
-define amdgpu_kernel void @constant_align2_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) #0 {
- %v = load i8, i8 addrspace(2)* %p, align 2
+define amdgpu_kernel void @constant_align2_load_i8(i8 addrspace(4)* %p, i8 addrspace(1)* %r) #0 {
+ %v = load i8, i8 addrspace(4)* %p, align 2
store i8 %v, i8 addrspace(1)* %r, align 2
ret void
}
@@ -541,10 +541,10 @@ define amdgpu_kernel void @constant_alig
; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[LO]]
; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HI]]
; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
-define amdgpu_kernel void @constant_align4_merge_load_2_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 {
- %gep0 = getelementptr i32, i32 addrspace(2)* %p, i64 1
- %v0 = load i32, i32 addrspace(2)* %p, align 4
- %v1 = load i32, i32 addrspace(2)* %gep0, align 4
+define amdgpu_kernel void @constant_align4_merge_load_2_i32(i32 addrspace(4)* %p, i32 addrspace(1)* %r) #0 {
+ %gep0 = getelementptr i32, i32 addrspace(4)* %p, i64 1
+ %v0 = load i32, i32 addrspace(4)* %p, align 4
+ %v1 = load i32, i32 addrspace(4)* %gep0, align 4
%gep1 = getelementptr i32, i32 addrspace(1)* %r, i64 1
store i32 %v0, i32 addrspace(1)* %r, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/uniform-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/uniform-crash.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/uniform-crash.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/uniform-crash.ll Tue Feb 13 10:00:25 2018
@@ -35,7 +35,7 @@ bb2:
br label %bb3
bb3: ; preds = %bb3, %bb2
- %val = load volatile i32, i32 addrspace(2)* undef
+ %val = load volatile i32, i32 addrspace(4)* undef
%tmp4 = icmp eq i32 %val, %arg1
br i1 %tmp4, label %bb5, label %bb3
Modified: llvm/trunk/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll Tue Feb 13 10:00:25 2018
@@ -36,11 +36,11 @@ define amdgpu_kernel void @vtx_fetch32_i
; EG: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #2 ; encoding: [0x40,0x02,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00
; CM: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #2 ; encoding: [0x40,0x02,0x0[[GPR]],0x00,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x00,0x00
- at t = internal addrspace(2) constant [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+ at t = internal addrspace(4) constant [4 x i32] [i32 0, i32 1, i32 2, i32 3]
define amdgpu_kernel void @vtx_fetch32_id2(i32 addrspace(1)* %out, i32 %in) {
- %a = getelementptr inbounds [4 x i32], [4 x i32] addrspace(2)* @t, i32 0, i32 %in
- %v = load i32, i32 addrspace(2)* %a
+ %a = getelementptr inbounds [4 x i32], [4 x i32] addrspace(4)* @t, i32 0, i32 %in
+ %v = load i32, i32 addrspace(4)* %a
store i32 %v, i32 addrspace(1)* %out
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll Tue Feb 13 10:00:25 2018
@@ -27,15 +27,15 @@
; GCN: NumVgprs: 256
; GCN: ScratchSize: 1536
-define amdgpu_vs void @main([9 x <4 x i32>] addrspace(2)* byval %arg, [17 x <4 x i32>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <4 x i32>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
+define amdgpu_vs void @main([9 x <4 x i32>] addrspace(4)* byval %arg, [17 x <4 x i32>] addrspace(4)* byval %arg1, [17 x <4 x i32>] addrspace(4)* byval %arg2, [34 x <8 x i32>] addrspace(4)* byval %arg3, [16 x <4 x i32>] addrspace(4)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
bb:
- %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg1, i64 0, i64 0
- %tmp11 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, align 16, !tbaa !0
+ %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg1, i64 0, i64 0
+ %tmp11 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, align 16, !tbaa !0
%tmp12 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp11, i32 0)
%tmp13 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp11, i32 16)
%tmp14 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp11, i32 32)
- %tmp15 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %arg4, i64 0, i64 0
- %tmp16 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp15, align 16, !tbaa !0
+ %tmp15 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(4)* %arg4, i64 0, i64 0
+ %tmp16 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp15, align 16, !tbaa !0
%tmp17 = add i32 %arg5, %arg7
%tmp16.cast = bitcast <4 x i32> %tmp16 to <4 x i32>
%tmp18 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp16.cast, i32 %tmp17, i32 0, i1 false, i1 false)
Modified: llvm/trunk/test/CodeGen/AMDGPU/wait.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/wait.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/wait.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/wait.ll Tue Feb 13 10:00:25 2018
@@ -11,19 +11,19 @@
; DEFAULT: exp
; DEFAULT: s_waitcnt lgkmcnt(0)
; DEFAULT: s_endpgm
-define amdgpu_vs void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 {
+define amdgpu_vs void @main(<16 x i8> addrspace(4)* inreg %arg, <16 x i8> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, <16 x i8> addrspace(4)* inreg %arg3, <16 x i8> addrspace(4)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(4)* inreg %constptr) #0 {
main_body:
- %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 0
- %tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
+ %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(4)* %arg3, i32 0
+ %tmp10 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp, !tbaa !0
%tmp10.cast = bitcast <16 x i8> %tmp10 to <4 x i32>
%tmp11 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp10.cast, i32 %arg6, i32 0, i1 false, i1 false)
%tmp12 = extractelement <4 x float> %tmp11, i32 0
%tmp13 = extractelement <4 x float> %tmp11, i32 1
call void @llvm.amdgcn.s.barrier() #1
%tmp14 = extractelement <4 x float> %tmp11, i32 2
- %tmp15 = load float, float addrspace(2)* %constptr, align 4
- %tmp16 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 1
- %tmp17 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp16, !tbaa !0
+ %tmp15 = load float, float addrspace(4)* %constptr, align 4
+ %tmp16 = getelementptr <16 x i8>, <16 x i8> addrspace(4)* %arg3, i32 1
+ %tmp17 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp16, !tbaa !0
%tmp17.cast = bitcast <16 x i8> %tmp17 to <4 x i32>
%tmp18 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp17.cast, i32 %arg6, i32 0, i1 false, i1 false)
%tmp19 = extractelement <4 x float> %tmp18, i32 0
@@ -46,10 +46,10 @@ main_body:
; ILPMAX: exp pos0
; ILPMAX-NEXT: exp param0
; ILPMAX: s_endpgm
-define amdgpu_vs void @main2([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <16 x i8>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
+define amdgpu_vs void @main2([6 x <16 x i8>] addrspace(4)* byval %arg, [17 x <16 x i8>] addrspace(4)* byval %arg1, [17 x <4 x i32>] addrspace(4)* byval %arg2, [34 x <8 x i32>] addrspace(4)* byval %arg3, [16 x <16 x i8>] addrspace(4)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
main_body:
- %tmp = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %arg4, i64 0, i64 0
- %tmp11 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, align 16, !tbaa !0
+ %tmp = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(4)* %arg4, i64 0, i64 0
+ %tmp11 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp, align 16, !tbaa !0
%tmp12 = add i32 %arg5, %arg7
%tmp11.cast = bitcast <16 x i8> %tmp11 to <4 x i32>
%tmp13 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp11.cast, i32 %tmp12, i32 0, i1 false, i1 false)
@@ -57,8 +57,8 @@ main_body:
%tmp15 = extractelement <4 x float> %tmp13, i32 1
%tmp16 = extractelement <4 x float> %tmp13, i32 2
%tmp17 = extractelement <4 x float> %tmp13, i32 3
- %tmp18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %arg4, i64 0, i64 1
- %tmp19 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp18, align 16, !tbaa !0
+ %tmp18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(4)* %arg4, i64 0, i64 1
+ %tmp19 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp18, align 16, !tbaa !0
%tmp20 = add i32 %arg5, %arg7
%tmp19.cast = bitcast <16 x i8> %tmp19 to <4 x i32>
%tmp21 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp19.cast, i32 %tmp20, i32 0, i1 false, i1 false)
Modified: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-looptest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/waitcnt-looptest.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-looptest.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-looptest.ll Tue Feb 13 10:00:25 2018
@@ -22,19 +22,19 @@ bb:
br label %bb18
bb1: ; preds = %bb18
- %tmp = tail call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
+ %tmp = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
%tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp3 = tail call i32 @llvm.amdgcn.workgroup.id.x()
- %tmp4 = getelementptr inbounds i8, i8 addrspace(2)* %tmp, i64 4
- %tmp5 = bitcast i8 addrspace(2)* %tmp4 to i16 addrspace(2)*
- %tmp6 = load i16, i16 addrspace(2)* %tmp5, align 4
+ %tmp4 = getelementptr inbounds i8, i8 addrspace(4)* %tmp, i64 4
+ %tmp5 = bitcast i8 addrspace(4)* %tmp4 to i16 addrspace(4)*
+ %tmp6 = load i16, i16 addrspace(4)* %tmp5, align 4
%tmp7 = zext i16 %tmp6 to i32
%tmp8 = mul i32 %tmp3, %tmp7
%tmp9 = add i32 %tmp8, %tmp2
- %tmp10 = tail call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
+ %tmp10 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%tmp11 = zext i32 %tmp9 to i64
- %tmp12 = bitcast i8 addrspace(2)* %tmp10 to i64 addrspace(2)*
- %tmp13 = load i64, i64 addrspace(2)* %tmp12, align 8
+ %tmp12 = bitcast i8 addrspace(4)* %tmp10 to i64 addrspace(4)*
+ %tmp13 = load i64, i64 addrspace(4)* %tmp12, align 8
%tmp14 = add i64 %tmp13, %tmp11
%tmp15 = zext i1 %tmp99 to i32
%tmp16 = and i64 %tmp14, 4294967295
@@ -131,7 +131,7 @@ bb18:
}
; Function Attrs: nounwind readnone speculatable
-declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #1
+declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
; Function Attrs: nounwind readnone speculatable
declare i32 @llvm.amdgcn.workitem.id.x() #1
@@ -140,7 +140,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
declare i32 @llvm.amdgcn.workgroup.id.x() #1
; Function Attrs: nounwind readnone speculatable
-declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #1
+declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #1
attributes #0 = { "target-cpu"="fiji" "target-features"="-flat-for-global" }
attributes #1 = { nounwind readnone speculatable }
Modified: llvm/trunk/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll Tue Feb 13 10:00:25 2018
@@ -1,12 +1,12 @@
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare < %s | FileCheck -check-prefix=OPT %s
-declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
+declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
; OPT-LABEL: @constant_load_i1
; OPT: load i1
; OPT-NEXT: store i1
-define amdgpu_kernel void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(2)* %in) #0 {
- %val = load i1, i1 addrspace(2)* %in
+define amdgpu_kernel void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(4)* %in) #0 {
+ %val = load i1, i1 addrspace(4)* %in
store i1 %val, i1 addrspace(1)* %out
ret void
}
@@ -14,8 +14,8 @@ define amdgpu_kernel void @constant_load
; OPT-LABEL: @constant_load_i1_align2
; OPT: load i1
; OPT-NEXT: store
-define amdgpu_kernel void @constant_load_i1_align2(i1 addrspace(1)* %out, i1 addrspace(2)* %in) #0 {
- %val = load i1, i1 addrspace(2)* %in, align 2
+define amdgpu_kernel void @constant_load_i1_align2(i1 addrspace(1)* %out, i1 addrspace(4)* %in) #0 {
+ %val = load i1, i1 addrspace(4)* %in, align 2
store i1 %val, i1 addrspace(1)* %out, align 2
ret void
}
@@ -25,8 +25,8 @@ define amdgpu_kernel void @constant_load
; OPT-NEXT: load i32
; OPT-NEXT: trunc
; OPT-NEXT: store
-define amdgpu_kernel void @constant_load_i1_align4(i1 addrspace(1)* %out, i1 addrspace(2)* %in) #0 {
- %val = load i1, i1 addrspace(2)* %in, align 4
+define amdgpu_kernel void @constant_load_i1_align4(i1 addrspace(1)* %out, i1 addrspace(4)* %in) #0 {
+ %val = load i1, i1 addrspace(4)* %in, align 4
store i1 %val, i1 addrspace(1)* %out, align 4
ret void
}
@@ -34,8 +34,8 @@ define amdgpu_kernel void @constant_load
; OPT-LABEL: @constant_load_i8
; OPT: load i8
; OPT-NEXT: store
-define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
- %val = load i8, i8 addrspace(2)* %in
+define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
+ %val = load i8, i8 addrspace(4)* %in
store i8 %val, i8 addrspace(1)* %out
ret void
}
@@ -43,8 +43,8 @@ define amdgpu_kernel void @constant_load
; OPT-LABEL: @constant_load_i8_align2
; OPT: load i8
; OPT-NEXT: store
-define amdgpu_kernel void @constant_load_i8_align2(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
- %val = load i8, i8 addrspace(2)* %in, align 2
+define amdgpu_kernel void @constant_load_i8_align2(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
+ %val = load i8, i8 addrspace(4)* %in, align 2
store i8 %val, i8 addrspace(1)* %out, align 2
ret void
}
@@ -54,8 +54,8 @@ define amdgpu_kernel void @constant_load
; OPT-NEXT: load i32
; OPT-NEXT: trunc
; OPT-NEXT: store
-define amdgpu_kernel void @constant_load_i8align4(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
- %val = load i8, i8 addrspace(2)* %in, align 4
+define amdgpu_kernel void @constant_load_i8align4(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
+ %val = load i8, i8 addrspace(4)* %in, align 4
store i8 %val, i8 addrspace(1)* %out, align 4
ret void
}
@@ -64,8 +64,8 @@ define amdgpu_kernel void @constant_load
; OPT-LABEL: @constant_load_v2i8
; OPT: load <2 x i8>
; OPT-NEXT: store
-define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
- %ld = load <2 x i8>, <2 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
+ %ld = load <2 x i8>, <2 x i8> addrspace(4)* %in
store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
ret void
}
@@ -76,32 +76,32 @@ define amdgpu_kernel void @constant_load
; OPT-NEXT: trunc
; OPT-NEXT: bitcast
; OPT-NEXT: store
-define amdgpu_kernel void @constant_load_v2i8_align4(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
- %ld = load <2 x i8>, <2 x i8> addrspace(2)* %in, align 4
+define amdgpu_kernel void @constant_load_v2i8_align4(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
+ %ld = load <2 x i8>, <2 x i8> addrspace(4)* %in, align 4
store <2 x i8> %ld, <2 x i8> addrspace(1)* %out, align 4
ret void
}
; OPT-LABEL: @constant_load_v3i8
; OPT: bitcast <3 x i8>
-; OPT-NEXT: load i32, i32 addrspace(2)
+; OPT-NEXT: load i32, i32 addrspace(4)
; OPT-NEXT: trunc i32
; OPT-NEXT: bitcast i24
; OPT-NEXT: store <3 x i8>
-define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
- %ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
+ %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
ret void
}
; OPT-LABEL: @constant_load_v3i8_align4
; OPT: bitcast <3 x i8>
-; OPT-NEXT: load i32, i32 addrspace(2)
+; OPT-NEXT: load i32, i32 addrspace(4)
; OPT-NEXT: trunc i32
; OPT-NEXT: bitcast i24
; OPT-NEXT: store <3 x i8>
-define amdgpu_kernel void @constant_load_v3i8_align4(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
- %ld = load <3 x i8>, <3 x i8> addrspace(2)* %in, align 4
+define amdgpu_kernel void @constant_load_v3i8_align4(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
+ %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in, align 4
store <3 x i8> %ld, <3 x i8> addrspace(1)* %out, align 4
ret void
}
@@ -110,8 +110,8 @@ define amdgpu_kernel void @constant_load
; OPT: load i16
; OPT: sext
; OPT-NEXT: store
-define amdgpu_kernel void @constant_load_i16(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
- %ld = load i16, i16 addrspace(2)* %in
+define amdgpu_kernel void @constant_load_i16(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
+ %ld = load i16, i16 addrspace(4)* %in
%ext = sext i16 %ld to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@@ -123,8 +123,8 @@ define amdgpu_kernel void @constant_load
; OPT-NEXT: trunc
; OPT-NEXT: sext
; OPT-NEXT: store
-define amdgpu_kernel void @constant_load_i16_align4(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
- %ld = load i16, i16 addrspace(2)* %in, align 4
+define amdgpu_kernel void @constant_load_i16_align4(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
+ %ld = load i16, i16 addrspace(4)* %in, align 4
%ext = sext i16 %ld to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -133,8 +133,8 @@ define amdgpu_kernel void @constant_load
; OPT-LABEL: @constant_load_f16
; OPT: load half
; OPT-NEXT: store
-define amdgpu_kernel void @constant_load_f16(half addrspace(1)* %out, half addrspace(2)* %in) #0 {
- %ld = load half, half addrspace(2)* %in
+define amdgpu_kernel void @constant_load_f16(half addrspace(1)* %out, half addrspace(4)* %in) #0 {
+ %ld = load half, half addrspace(4)* %in
store half %ld, half addrspace(1)* %out
ret void
}
@@ -142,8 +142,8 @@ define amdgpu_kernel void @constant_load
; OPT-LABEL: @constant_load_v2f16
; OPT: load <2 x half>
; OPT-NEXT: store
-define amdgpu_kernel void @constant_load_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(2)* %in) #0 {
- %ld = load <2 x half>, <2 x half> addrspace(2)* %in
+define amdgpu_kernel void @constant_load_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(4)* %in) #0 {
+ %ld = load <2 x half>, <2 x half> addrspace(4)* %in
store <2 x half> %ld, <2 x half> addrspace(1)* %out
ret void
}
@@ -151,8 +151,8 @@ define amdgpu_kernel void @constant_load
; OPT-LABEL: @load_volatile
; OPT: load volatile i16
; OPT-NEXT: store
-define amdgpu_kernel void @load_volatile(i16 addrspace(1)* %out, i16 addrspace(2)* %in) {
- %a = load volatile i16, i16 addrspace(2)* %in
+define amdgpu_kernel void @load_volatile(i16 addrspace(1)* %out, i16 addrspace(4)* %in) {
+ %a = load volatile i16, i16 addrspace(4)* %in
store i16 %a, i16 addrspace(1)* %out
ret void
}
@@ -160,8 +160,8 @@ define amdgpu_kernel void @load_volatile
; OPT-LABEL: @constant_load_v2i8_volatile
; OPT: load volatile <2 x i8>
; OPT-NEXT: store
-define amdgpu_kernel void @constant_load_v2i8_volatile(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
- %ld = load volatile <2 x i8>, <2 x i8> addrspace(2)* %in
+define amdgpu_kernel void @constant_load_v2i8_volatile(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
+ %ld = load volatile <2 x i8>, <2 x i8> addrspace(4)* %in
store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
ret void
}
@@ -182,8 +182,8 @@ define amdgpu_kernel void @constant_load
; OPT-NEXT: zext
; OPT-NEXT: store
define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
- %dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
- %val = load i8, i8 addrspace(2)* %dispatch.ptr, align 4
+ %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+ %val = load i8, i8 addrspace(4)* %dispatch.ptr, align 4
%ld = zext i8 %val to i32
store i32 %ld, i32 addrspace(1)* %ptr
ret void
Modified: llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll?rev=325030&r1=325029&r2=325030&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll (original)
+++ llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll Tue Feb 13 10:00:25 2018
@@ -2,64 +2,64 @@
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
- at array = internal addrspace(2) constant [4096 x [32 x float]] zeroinitializer, align 4
+ at array = internal addrspace(4) constant [4096 x [32 x float]] zeroinitializer, align 4
; IR-LABEL: @sum_of_array(
-; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 1
-; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 32
-; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 33
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [4096 x [32 x float]], [4096 x [32 x float]] addrspace(4)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr inbounds float, float addrspace(4)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(4)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(4)* [[BASE_PTR]], i64 33
define amdgpu_kernel void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
%tmp = sext i32 %y to i64
%tmp1 = sext i32 %x to i64
- %tmp2 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp
- %tmp4 = load float, float addrspace(2)* %tmp2, align 4
+ %tmp2 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(4)* @array, i64 0, i64 %tmp1, i64 %tmp
+ %tmp4 = load float, float addrspace(4)* %tmp2, align 4
%tmp5 = fadd float %tmp4, 0.000000e+00
%tmp6 = add i32 %y, 1
%tmp7 = sext i32 %tmp6 to i64
- %tmp8 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp7
- %tmp10 = load float, float addrspace(2)* %tmp8, align 4
+ %tmp8 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(4)* @array, i64 0, i64 %tmp1, i64 %tmp7
+ %tmp10 = load float, float addrspace(4)* %tmp8, align 4
%tmp11 = fadd float %tmp5, %tmp10
%tmp12 = add i32 %x, 1
%tmp13 = sext i32 %tmp12 to i64
- %tmp14 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp13, i64 %tmp
- %tmp16 = load float, float addrspace(2)* %tmp14, align 4
+ %tmp14 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(4)* @array, i64 0, i64 %tmp13, i64 %tmp
+ %tmp16 = load float, float addrspace(4)* %tmp14, align 4
%tmp17 = fadd float %tmp11, %tmp16
- %tmp18 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp13, i64 %tmp7
- %tmp20 = load float, float addrspace(2)* %tmp18, align 4
+ %tmp18 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(4)* @array, i64 0, i64 %tmp13, i64 %tmp7
+ %tmp20 = load float, float addrspace(4)* %tmp18, align 4
%tmp21 = fadd float %tmp17, %tmp20
store float %tmp21, float addrspace(1)* %output, align 4
ret void
}
- at array2 = internal addrspace(2) constant [4096 x [4 x float]] zeroinitializer, align 4
+ at array2 = internal addrspace(4) constant [4096 x [4 x float]] zeroinitializer, align 4
; Some of the indices go over the maximum mubuf offset, so don't split them.
; IR-LABEL: @sum_of_array_over_max_mubuf_offset(
-; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 255
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr inbounds float, float addrspace(4)* [[BASE_PTR]], i64 255
; IR: add i32 %x, 256
-; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
define amdgpu_kernel void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
%tmp = sext i32 %y to i64
%tmp1 = sext i32 %x to i64
- %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp
- %tmp4 = load float, float addrspace(2)* %tmp2, align 4
+ %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %tmp1, i64 %tmp
+ %tmp4 = load float, float addrspace(4)* %tmp2, align 4
%tmp5 = fadd float %tmp4, 0.000000e+00
%tmp6 = add i32 %y, 255
%tmp7 = sext i32 %tmp6 to i64
- %tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp7
- %tmp10 = load float, float addrspace(2)* %tmp8, align 4
+ %tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %tmp1, i64 %tmp7
+ %tmp10 = load float, float addrspace(4)* %tmp8, align 4
%tmp11 = fadd float %tmp5, %tmp10
%tmp12 = add i32 %x, 256
%tmp13 = sext i32 %tmp12 to i64
- %tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp13, i64 %tmp
- %tmp16 = load float, float addrspace(2)* %tmp14, align 4
+ %tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %tmp13, i64 %tmp
+ %tmp16 = load float, float addrspace(4)* %tmp14, align 4
%tmp17 = fadd float %tmp11, %tmp16
- %tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp13, i64 %tmp7
- %tmp20 = load float, float addrspace(2)* %tmp18, align 4
+ %tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %tmp13, i64 %tmp7
+ %tmp20 = load float, float addrspace(4)* %tmp18, align 4
%tmp21 = fadd float %tmp17, %tmp20
store float %tmp21, float addrspace(1)* %output, align 4
ret void
@@ -97,18 +97,18 @@ define amdgpu_kernel void @sum_of_lds_ar
; IR: getelementptr {{.*}} !amdgpu.uniform
; IR: getelementptr {{.*}} !amdgpu.uniform
; IR: getelementptr {{.*}} !amdgpu.uniform
-define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @keep_metadata([0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
+define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @keep_metadata([0 x <4 x i32>] addrspace(4)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(4)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(4)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(4)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
main_body:
%22 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #8
%23 = bitcast float %22 to i32
%24 = shl i32 %23, 1
- %25 = getelementptr [0 x <8 x i32>], [0 x <8 x i32>] addrspace(2)* %1, i32 0, i32 %24, !amdgpu.uniform !0
- %26 = load <8 x i32>, <8 x i32> addrspace(2)* %25, align 32, !invariant.load !0
+ %25 = getelementptr [0 x <8 x i32>], [0 x <8 x i32>] addrspace(4)* %1, i32 0, i32 %24, !amdgpu.uniform !0
+ %26 = load <8 x i32>, <8 x i32> addrspace(4)* %25, align 32, !invariant.load !0
%27 = shl i32 %23, 2
%28 = or i32 %27, 3
- %29 = bitcast [0 x <8 x i32>] addrspace(2)* %1 to [0 x <4 x i32>] addrspace(2)*
- %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i32 0, i32 %28, !amdgpu.uniform !0
- %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !invariant.load !0
+ %29 = bitcast [0 x <8 x i32>] addrspace(4)* %1 to [0 x <4 x i32>] addrspace(4)*
+ %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(4)* %29, i32 0, i32 %28, !amdgpu.uniform !0
+ %31 = load <4 x i32>, <4 x i32> addrspace(4)* %30, align 16, !invariant.load !0
%32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
More information about the llvm-commits
mailing list