[llvm] r265589 - AMDGPU: Add a shader calling convention
Nicolai Haehnle via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 6 12:40:23 PDT 2016
Author: nha
Date: Wed Apr 6 14:40:20 2016
New Revision: 265589
URL: http://llvm.org/viewvc/llvm-project?rev=265589&view=rev
Log:
AMDGPU: Add a shader calling convention
This makes it possible to distinguish between mesa shaders
and other kernels even in the presence of compute shaders.
Patch By: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Differential Revision: http://reviews.llvm.org/D18559
Added:
llvm/trunk/test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll
Modified:
llvm/trunk/include/llvm/IR/CallingConv.h
llvm/trunk/lib/AsmParser/LLLexer.cpp
llvm/trunk/lib/AsmParser/LLParser.cpp
llvm/trunk/lib/AsmParser/LLToken.h
llvm/trunk/lib/IR/AsmWriter.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/trunk/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/R600InstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp
llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SITypeRewriter.cpp
llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll
llvm/trunk/test/CodeGen/AMDGPU/big_alu.ll
llvm/trunk/test/CodeGen/AMDGPU/bitcast.ll
llvm/trunk/test/CodeGen/AMDGPU/call_fs.ll
llvm/trunk/test/CodeGen/AMDGPU/cayman-loop-bug.ll
llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll
llvm/trunk/test/CodeGen/AMDGPU/complex-folding.ll
llvm/trunk/test/CodeGen/AMDGPU/elf.ll
llvm/trunk/test/CodeGen/AMDGPU/fetch-limits.r600.ll
llvm/trunk/test/CodeGen/AMDGPU/fetch-limits.r700+.ll
llvm/trunk/test/CodeGen/AMDGPU/floor.ll
llvm/trunk/test/CodeGen/AMDGPU/fmad.ll
llvm/trunk/test/CodeGen/AMDGPU/fmax.ll
llvm/trunk/test/CodeGen/AMDGPU/fmin.ll
llvm/trunk/test/CodeGen/AMDGPU/inline-asm.ll
llvm/trunk/test/CodeGen/AMDGPU/input-mods.ll
llvm/trunk/test/CodeGen/AMDGPU/jump-address.ll
llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.gather4.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.getlod.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.sample-masked.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.sample.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.sample.o.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.packf16.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.tbuffer.store.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.tid.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgpu.kilp.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.cos.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.pow.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.sin.ll
llvm/trunk/test/CodeGen/AMDGPU/load-input-fold.ll
llvm/trunk/test/CodeGen/AMDGPU/m0-spill.ll
llvm/trunk/test/CodeGen/AMDGPU/max-literals.ll
llvm/trunk/test/CodeGen/AMDGPU/mubuf.ll
llvm/trunk/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll
llvm/trunk/test/CodeGen/AMDGPU/predicate-dp4.ll
llvm/trunk/test/CodeGen/AMDGPU/pv-packing.ll
llvm/trunk/test/CodeGen/AMDGPU/pv.ll
llvm/trunk/test/CodeGen/AMDGPU/r600-encoding.ll
llvm/trunk/test/CodeGen/AMDGPU/r600-export-fix.ll
llvm/trunk/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll
llvm/trunk/test/CodeGen/AMDGPU/r600cfg.ll
llvm/trunk/test/CodeGen/AMDGPU/reciprocal.ll
llvm/trunk/test/CodeGen/AMDGPU/ret.ll
llvm/trunk/test/CodeGen/AMDGPU/ret_jump.ll
llvm/trunk/test/CodeGen/AMDGPU/rv7x0_count3.ll
llvm/trunk/test/CodeGen/AMDGPU/schedule-fs-loop-nested-if.ll
llvm/trunk/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll
llvm/trunk/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop.ll
llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll
llvm/trunk/test/CodeGen/AMDGPU/shared-op-cycle.ll
llvm/trunk/test/CodeGen/AMDGPU/si-literal-folding.ll
llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll
llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll
llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll
llvm/trunk/test/CodeGen/AMDGPU/si-spill-cf.ll
llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
llvm/trunk/test/CodeGen/AMDGPU/smrd.ll
llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll
llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
llvm/trunk/test/CodeGen/AMDGPU/swizzle-export.ll
llvm/trunk/test/CodeGen/AMDGPU/tex-clause-antidep.ll
llvm/trunk/test/CodeGen/AMDGPU/texture-input-merge.ll
llvm/trunk/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
llvm/trunk/test/CodeGen/AMDGPU/wait.ll
llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
Modified: llvm/trunk/include/llvm/IR/CallingConv.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/CallingConv.h?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/CallingConv.h (original)
+++ llvm/trunk/include/llvm/IR/CallingConv.h Wed Apr 6 14:40:20 2016
@@ -178,6 +178,18 @@ namespace CallingConv {
/// which have an "optimized" convention to preserve registers.
AVR_BUILTIN = 86,
+ /// Calling convention used for Mesa vertex shaders.
+ AMDGPU_VS = 87,
+
+ /// Calling convention used for Mesa geometry shaders.
+ AMDGPU_GS = 88,
+
+ /// Calling convention used for Mesa pixel shaders.
+ AMDGPU_PS = 89,
+
+ /// Calling convention used for Mesa compute shaders.
+ AMDGPU_CS = 90,
+
/// The highest possible calling convention ID. Must be some 2^k - 1.
MaxID = 1023
};
Modified: llvm/trunk/lib/AsmParser/LLLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/AsmParser/LLLexer.cpp?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/AsmParser/LLLexer.cpp (original)
+++ llvm/trunk/lib/AsmParser/LLLexer.cpp Wed Apr 6 14:40:20 2016
@@ -599,6 +599,10 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(hhvmcc);
KEYWORD(hhvm_ccc);
KEYWORD(cxx_fast_tlscc);
+ KEYWORD(amdgpu_vs);
+ KEYWORD(amdgpu_gs);
+ KEYWORD(amdgpu_ps);
+ KEYWORD(amdgpu_cs);
KEYWORD(cc);
KEYWORD(c);
Modified: llvm/trunk/lib/AsmParser/LLParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/AsmParser/LLParser.cpp?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/AsmParser/LLParser.cpp (original)
+++ llvm/trunk/lib/AsmParser/LLParser.cpp Wed Apr 6 14:40:20 2016
@@ -1616,6 +1616,12 @@ bool LLParser::ParseOptionalDLLStorageCl
/// ::= 'hhvmcc'
/// ::= 'hhvm_ccc'
/// ::= 'cxx_fast_tlscc'
+/// ::= 'amdgpu_vs'
+/// ::= 'amdgpu_tcs'
+/// ::= 'amdgpu_tes'
+/// ::= 'amdgpu_gs'
+/// ::= 'amdgpu_ps'
+/// ::= 'amdgpu_cs'
/// ::= 'cc' UINT
///
bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
@@ -1651,6 +1657,10 @@ bool LLParser::ParseOptionalCallingConv(
case lltok::kw_hhvmcc: CC = CallingConv::HHVM; break;
case lltok::kw_hhvm_ccc: CC = CallingConv::HHVM_C; break;
case lltok::kw_cxx_fast_tlscc: CC = CallingConv::CXX_FAST_TLS; break;
+ case lltok::kw_amdgpu_vs: CC = CallingConv::AMDGPU_VS; break;
+ case lltok::kw_amdgpu_gs: CC = CallingConv::AMDGPU_GS; break;
+ case lltok::kw_amdgpu_ps: CC = CallingConv::AMDGPU_PS; break;
+ case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break;
case lltok::kw_cc: {
Lex.Lex();
return ParseUInt32(CC);
Modified: llvm/trunk/lib/AsmParser/LLToken.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/AsmParser/LLToken.h?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/AsmParser/LLToken.h (original)
+++ llvm/trunk/lib/AsmParser/LLToken.h Wed Apr 6 14:40:20 2016
@@ -106,6 +106,10 @@ namespace lltok {
kw_x86_intrcc,
kw_hhvmcc, kw_hhvm_ccc,
kw_cxx_fast_tlscc,
+ kw_amdgpu_vs,
+ kw_amdgpu_gs,
+ kw_amdgpu_ps,
+ kw_amdgpu_cs,
// Attributes:
kw_attributes,
Modified: llvm/trunk/lib/IR/AsmWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AsmWriter.cpp?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AsmWriter.cpp (original)
+++ llvm/trunk/lib/IR/AsmWriter.cpp Wed Apr 6 14:40:20 2016
@@ -319,6 +319,10 @@ static void PrintCallingConv(unsigned cc
case CallingConv::X86_INTR: Out << "x86_intrcc"; break;
case CallingConv::HHVM: Out << "hhvmcc"; break;
case CallingConv::HHVM_C: Out << "hhvm_ccc"; break;
+ case CallingConv::AMDGPU_VS: Out << "amdgpu_vs"; break;
+ case CallingConv::AMDGPU_GS: Out << "amdgpu_gs"; break;
+ case CallingConv::AMDGPU_PS: Out << "amdgpu_ps"; break;
+ case CallingConv::AMDGPU_CS: Out << "amdgpu_cs"; break;
}
}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.h?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.h Wed Apr 6 14:40:20 2016
@@ -123,15 +123,6 @@ enum TargetIndex {
} // End namespace llvm
-namespace ShaderType {
- enum Type {
- PIXEL = 0,
- VERTEX = 1,
- GEOMETRY = 2,
- COMPUTE = 3
- };
-}
-
/// OpenCL uses address spaces to differentiate between
/// various memory regions on the hardware. On the CPU
/// all of the address spaces point to the same memory,
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Wed Apr 6 14:40:20 2016
@@ -303,21 +303,21 @@ void AMDGPUAsmPrinter::EmitProgramInfoR6
unsigned RsrcReg;
if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) {
// Evergreen / Northern Islands
- switch (MFI->getShaderType()) {
+ switch (MF.getFunction()->getCallingConv()) {
default: // Fall through
- case ShaderType::COMPUTE: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
- case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
- case ShaderType::PIXEL: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
- case ShaderType::VERTEX: RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
+ case CallingConv::AMDGPU_CS: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
+ case CallingConv::AMDGPU_GS: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
+ case CallingConv::AMDGPU_PS: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
+ case CallingConv::AMDGPU_VS: RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
}
} else {
// R600 / R700
- switch (MFI->getShaderType()) {
+ switch (MF.getFunction()->getCallingConv()) {
default: // Fall through
- case ShaderType::GEOMETRY: // Fall through
- case ShaderType::COMPUTE: // Fall through
- case ShaderType::VERTEX: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
- case ShaderType::PIXEL: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
+ case CallingConv::AMDGPU_GS: // Fall through
+ case CallingConv::AMDGPU_CS: // Fall through
+ case CallingConv::AMDGPU_VS: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
+ case CallingConv::AMDGPU_PS: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
}
}
@@ -327,7 +327,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR6
OutStreamer->EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
OutStreamer->EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
- if (MFI->getShaderType() == ShaderType::COMPUTE) {
+ if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
OutStreamer->EmitIntValue(alignTo(MFI->LDSSize, 4) >> 2, 4);
}
@@ -546,13 +546,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
S_00B84C_EXCP_EN(0);
}
-static unsigned getRsrcReg(unsigned ShaderType) {
- switch (ShaderType) {
+static unsigned getRsrcReg(CallingConv::ID CallConv) {
+ switch (CallConv) {
default: // Fall through
- case ShaderType::COMPUTE: return R_00B848_COMPUTE_PGM_RSRC1;
- case ShaderType::GEOMETRY: return R_00B228_SPI_SHADER_PGM_RSRC1_GS;
- case ShaderType::PIXEL: return R_00B028_SPI_SHADER_PGM_RSRC1_PS;
- case ShaderType::VERTEX: return R_00B128_SPI_SHADER_PGM_RSRC1_VS;
+ case CallingConv::AMDGPU_CS: return R_00B848_COMPUTE_PGM_RSRC1;
+ case CallingConv::AMDGPU_GS: return R_00B228_SPI_SHADER_PGM_RSRC1_GS;
+ case CallingConv::AMDGPU_PS: return R_00B028_SPI_SHADER_PGM_RSRC1_PS;
+ case CallingConv::AMDGPU_VS: return R_00B128_SPI_SHADER_PGM_RSRC1_VS;
}
}
@@ -560,9 +560,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI
const SIProgramInfo &KernelInfo) {
const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- unsigned RsrcReg = getRsrcReg(MFI->getShaderType());
+ unsigned RsrcReg = getRsrcReg(MF.getFunction()->getCallingConv());
- if (MFI->getShaderType() == ShaderType::COMPUTE) {
+ if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc1, 4);
@@ -579,13 +579,13 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI
OutStreamer->EmitIntValue(RsrcReg, 4);
OutStreamer->EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
- if (STM.isVGPRSpillingEnabled(MFI)) {
+ if (STM.isVGPRSpillingEnabled(*MF.getFunction())) {
OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4);
}
}
- if (MFI->getShaderType() == ShaderType::PIXEL) {
+ if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_PS) {
OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td Wed Apr 6 14:40:20 2016
@@ -117,14 +117,12 @@ def CC_AMDGPU : CallingConv<[
CCIf<"static_cast<const AMDGPUSubtarget&>"
"(State.getMachineFunction().getSubtarget()).getGeneration() >="
"AMDGPUSubtarget::SOUTHERN_ISLANDS && "
- "State.getMachineFunction().getInfo<SIMachineFunctionInfo>()"
- "->getShaderType() == ShaderType::COMPUTE",
+ "!AMDGPU::isShader(State.getCallingConv())",
CCDelegateTo<CC_AMDGPU_Kernel>>,
CCIf<"static_cast<const AMDGPUSubtarget&>"
"(State.getMachineFunction().getSubtarget()).getGeneration() < "
"AMDGPUSubtarget::SOUTHERN_ISLANDS && "
- "State.getMachineFunction().getInfo<R600MachineFunctionInfo>()"
- "->getShaderType() == ShaderType::COMPUTE",
+ "!AMDGPU::isShader(State.getCallingConv())",
CCDelegateTo<CC_AMDGPU_Kernel>>,
CCIf<"static_cast<const AMDGPUSubtarget&>"
"(State.getMachineFunction().getSubtarget()).getGeneration() >= "
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp Wed Apr 6 14:40:20 2016
@@ -10,11 +10,8 @@ void AMDGPUMachineFunction::anchor() {}
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MachineFunctionInfo(),
- ShaderType(ShaderType::COMPUTE),
LDSSize(0),
ABIArgOffset(0),
ScratchSize(0),
IsKernel(true) {
-
- ShaderType = AMDGPU::getShaderType(*MF.getFunction());
}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.h?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.h Wed Apr 6 14:40:20 2016
@@ -17,7 +17,6 @@ namespace llvm {
class AMDGPUMachineFunction : public MachineFunctionInfo {
virtual void anchor();
- unsigned ShaderType;
public:
AMDGPUMachineFunction(const MachineFunction &MF);
@@ -30,10 +29,6 @@ public:
/// Start of implicit kernel args
unsigned ABIArgOffset;
- unsigned getShaderType() const {
- return ShaderType;
- }
-
bool isKernel() const {
// FIXME: Assume everything is a kernel until function calls are supported.
return true;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Wed Apr 6 14:40:20 2016
@@ -136,9 +136,8 @@ AMDGPU::IsaVersion AMDGPUSubtarget::getI
return AMDGPU::getIsaVersion(getFeatureBits());
}
-bool AMDGPUSubtarget::isVGPRSpillingEnabled(
- const SIMachineFunctionInfo *MFI) const {
- return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling;
+bool AMDGPUSubtarget::isVGPRSpillingEnabled(const Function& F) const {
+ return !AMDGPU::isShader(F.getCallingConv()) || EnableVGPRSpilling;
}
void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Wed Apr 6 14:40:20 2016
@@ -305,7 +305,7 @@ public:
bool isAmdHsaOS() const {
return TargetTriple.getOS() == Triple::AMDHSA;
}
- bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const;
+ bool isVGPRSpillingEnabled(const Function& F) const;
bool isXNACKEnabled() const {
return EnableXNACK;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp Wed Apr 6 14:40:20 2016
@@ -265,10 +265,9 @@ static bool isIntrinsicSourceOfDivergenc
static bool isArgPassedInSGPR(const Argument *A) {
const Function *F = A->getParent();
- unsigned ShaderType = AMDGPU::getShaderType(*F);
// Arguments to compute shaders are never a source of divergence.
- if (ShaderType == ShaderType::COMPUTE)
+ if (!AMDGPU::isShader(F->getCallingConv()))
return true;
// For non-compute shaders, SGPR inputs are marked with either inreg or byval.
Modified: llvm/trunk/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp Wed Apr 6 14:40:20 2016
@@ -46,9 +46,9 @@ struct CFStack {
unsigned CurrentEntries;
unsigned CurrentSubEntries;
- CFStack(const AMDGPUSubtarget *st, unsigned ShaderType) : ST(st),
+ CFStack(const AMDGPUSubtarget *st, CallingConv::ID cc) : ST(st),
// We need to reserve a stack entry for CALL_FS in vertex shaders.
- MaxStackSize(ShaderType == ShaderType::VERTEX ? 1 : 0),
+ MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0),
CurrentEntries(0), CurrentSubEntries(0) { }
unsigned getLoopDepth();
@@ -478,14 +478,14 @@ public:
TRI = static_cast<const R600RegisterInfo *>(ST->getRegisterInfo());
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
- CFStack CFStack(ST, MFI->getShaderType());
+ CFStack CFStack(ST, MF.getFunction()->getCallingConv());
for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
++MB) {
MachineBasicBlock &MBB = *MB;
unsigned CfCount = 0;
std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
std::vector<MachineInstr * > IfThenElseStack;
- if (MFI->getShaderType() == ShaderType::VERTEX) {
+ if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_VS) {
BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
getHWInstrDesc(CF_CALL_FS));
CfCount++;
Modified: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp Wed Apr 6 14:40:20 2016
@@ -1759,7 +1759,7 @@ SDValue R600TargetLowering::LowerFormalA
MemVT = MemVT.getVectorElementType();
}
- if (MFI->getShaderType() != ShaderType::COMPUTE) {
+ if (AMDGPU::isShader(CallConv)) {
unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
InVals.push_back(Register);
Modified: llvm/trunk/lib/Target/AMDGPU/R600InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600InstrInfo.cpp?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600InstrInfo.cpp Wed Apr 6 14:40:20 2016
@@ -204,8 +204,7 @@ bool R600InstrInfo::usesVertexCache(unsi
bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const {
const MachineFunction *MF = MI->getParent()->getParent();
- const R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
- return MFI->getShaderType() != ShaderType::COMPUTE &&
+ return !AMDGPU::isCompute(MF->getFunction()->getCallingConv()) &&
usesVertexCache(MI->getOpcode());
}
@@ -215,8 +214,7 @@ bool R600InstrInfo::usesTextureCache(uns
bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
const MachineFunction *MF = MI->getParent()->getParent();
- const R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
- return (MFI->getShaderType() == ShaderType::COMPUTE &&
+ return (AMDGPU::isCompute(MF->getFunction()->getCallingConv()) &&
usesVertexCache(MI->getOpcode())) ||
usesTextureCache(MI->getOpcode());
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed Apr 6 14:40:20 2016
@@ -606,7 +606,7 @@ SDValue SITargetLowering::LowerFormalArg
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
- if (Subtarget->isAmdHsaOS() && Info->getShaderType() != ShaderType::COMPUTE) {
+ if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) {
const Function *Fn = MF.getFunction();
DiagnosticInfoUnsupported NoGraphicsHSA(
*Fn, "unsupported non-compute shaders with HSA", DL.getDebugLoc());
@@ -614,8 +614,6 @@ SDValue SITargetLowering::LowerFormalArg
return SDValue();
}
- // FIXME: We currently assume all calling conventions are kernels.
-
SmallVector<ISD::InputArg, 16> Splits;
BitVector Skipped(Ins.size());
@@ -623,7 +621,7 @@ SDValue SITargetLowering::LowerFormalArg
const ISD::InputArg &Arg = Ins[i];
// First check if it's a PS input addr
- if (Info->getShaderType() == ShaderType::PIXEL && !Arg.Flags.isInReg() &&
+ if (CallConv == CallingConv::AMDGPU_PS && !Arg.Flags.isInReg() &&
!Arg.Flags.isByVal() && PSInputNum <= 15) {
if (!Arg.Used && !Info->isPSInputAllocated(PSInputNum)) {
@@ -641,7 +639,8 @@ SDValue SITargetLowering::LowerFormalArg
}
// Second split vertices into their elements
- if (Info->getShaderType() != ShaderType::COMPUTE && Arg.VT.isVector()) {
+ if (AMDGPU::isShader(CallConv) &&
+ Arg.VT.isVector()) {
ISD::InputArg NewArg = Arg;
NewArg.Flags.setSplit();
NewArg.VT = Arg.VT.getVectorElementType();
@@ -657,7 +656,7 @@ SDValue SITargetLowering::LowerFormalArg
NewArg.PartOffset += NewArg.VT.getStoreSize();
}
- } else if (Info->getShaderType() != ShaderType::COMPUTE) {
+ } else if (AMDGPU::isShader(CallConv)) {
Splits.push_back(Arg);
}
}
@@ -678,7 +677,7 @@ SDValue SITargetLowering::LowerFormalArg
// - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
// - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
// enabled too.
- if (Info->getShaderType() == ShaderType::PIXEL &&
+ if (CallConv == CallingConv::AMDGPU_PS &&
((Info->getPSInputAddr() & 0x7F) == 0 ||
((Info->getPSInputAddr() & 0xF) == 0 &&
Info->isPSInputAllocated(11)))) {
@@ -688,7 +687,7 @@ SDValue SITargetLowering::LowerFormalArg
Info->PSInputEna |= 1;
}
- if (Info->getShaderType() == ShaderType::COMPUTE) {
+ if (!AMDGPU::isShader(CallConv)) {
getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
Splits);
}
@@ -932,7 +931,7 @@ SDValue SITargetLowering::LowerReturn(SD
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- if (Info->getShaderType() == ShaderType::COMPUTE)
+ if (!AMDGPU::isShader(CallConv))
return AMDGPUTargetLowering::LowerReturn(Chain, CallConv, isVarArg, Outs,
OutVals, DL, DAG);
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Wed Apr 6 14:40:20 2016
@@ -596,7 +596,7 @@ void SIInstrInfo::storeRegToStackSlot(Ma
return;
}
- if (!ST.isVGPRSpillingEnabled(MFI)) {
+ if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) {
LLVMContext &Ctx = MF->getFunction()->getContext();
Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
" spill register");
@@ -682,7 +682,7 @@ void SIInstrInfo::loadRegFromStackSlot(M
return;
}
- if (!ST.isVGPRSpillingEnabled(MFI)) {
+ if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) {
LLVMContext &Ctx = MF->getFunction()->getContext();
Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
" restore register");
@@ -728,7 +728,7 @@ unsigned SIInstrInfo::calculateLDSSpillA
return TIDReg;
- if (MFI->getShaderType() == ShaderType::COMPUTE &&
+ if (!AMDGPU::isShader(MF->getFunction()->getCallingConv()) &&
WorkGroupSize > WavefrontSize) {
unsigned TIDIGXReg
Modified: llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp Wed Apr 6 14:40:20 2016
@@ -169,8 +169,7 @@ void SILowerControlFlow::SkipIfDead(Mach
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
- if (MBB.getParent()->getInfo<SIMachineFunctionInfo>()->getShaderType() !=
- ShaderType::PIXEL ||
+ if (MBB.getParent()->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS ||
!shouldSkip(&MBB, &MBB.getParent()->back()))
return;
@@ -328,11 +327,10 @@ void SILowerControlFlow::Kill(MachineIns
const MachineOperand &Op = MI.getOperand(0);
#ifndef NDEBUG
- const SIMachineFunctionInfo *MFI
- = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
+ CallingConv::ID CallConv = MBB.getParent()->getFunction()->getCallingConv();
// Kill is only allowed in pixel / geometry shaders.
- assert(MFI->getShaderType() == ShaderType::PIXEL ||
- MFI->getShaderType() == ShaderType::GEOMETRY);
+ assert(CallConv == CallingConv::AMDGPU_PS ||
+ CallConv == CallingConv::AMDGPU_GS);
#endif
// Clear this thread from the exec mask if the operand is negative
Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp Wed Apr 6 14:40:20 2016
@@ -80,7 +80,7 @@ SIMachineFunctionInfo::SIMachineFunction
const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
- if (getShaderType() == ShaderType::COMPUTE)
+ if (!AMDGPU::isShader(F->getCallingConv()))
KernargSegmentPtr = true;
if (F->hasFnAttribute("amdgpu-work-group-id-y"))
@@ -100,7 +100,7 @@ SIMachineFunctionInfo::SIMachineFunction
if (WorkItemIDZ)
WorkItemIDY = true;
- bool MaySpill = ST.isVGPRSpillingEnabled(this);
+ bool MaySpill = ST.isVGPRSpillingEnabled(*F);
bool HasStackObjects = FrameInfo->hasStackObjects();
if (HasStackObjects || MaySpill)
@@ -202,5 +202,7 @@ unsigned SIMachineFunctionInfo::getMaxim
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
// FIXME: We should get this information from kernel attributes if it
// is available.
- return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();
+ if (AMDGPU::isCompute(MF.getFunction()->getCallingConv()))
+ return 256;
+ return ST.getWavefrontSize();
}
Modified: llvm/trunk/lib/Target/AMDGPU/SITypeRewriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SITypeRewriter.cpp?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SITypeRewriter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SITypeRewriter.cpp Wed Apr 6 14:40:20 2016
@@ -62,7 +62,7 @@ bool SITypeRewriter::doInitialization(Mo
}
bool SITypeRewriter::runOnFunction(Function &F) {
- if (AMDGPU::getShaderType(F) == ShaderType::COMPUTE)
+ if (!AMDGPU::isShader(F.getCallingConv()))
return false;
visit(F);
Modified: llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp Wed Apr 6 14:40:20 2016
@@ -425,9 +425,7 @@ void SIWholeQuadMode::processBlock(Machi
}
bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
- SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-
- if (MFI->getShaderType() != ShaderType::PIXEL)
+ if (MF.getFunction()->getCallingConv() != CallingConv::AMDGPU_PS)
return false;
Instructions.clear();
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Wed Apr 6 14:40:20 2016
@@ -124,14 +124,26 @@ static unsigned getIntegerAttribute(cons
return Result;
}
-unsigned getShaderType(const Function &F) {
- return getIntegerAttribute(F, "ShaderType", ShaderType::COMPUTE);
-}
-
unsigned getInitialPSInputAddr(const Function &F) {
return getIntegerAttribute(F, "InitialPSInputAddr", 0);
}
+bool isShader(CallingConv::ID cc) {
+ switch(cc) {
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool isCompute(CallingConv::ID cc) {
+ return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
+}
+
bool isSI(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
}
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Wed Apr 6 14:40:20 2016
@@ -11,6 +11,7 @@
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
#include "AMDKernelCodeT.h"
+#include "llvm/IR/CallingConv.h"
namespace llvm {
@@ -44,9 +45,10 @@ bool isGroupSegment(const GlobalValue *G
bool isGlobalSegment(const GlobalValue *GV);
bool isReadOnlySegment(const GlobalValue *GV);
-unsigned getShaderType(const Function &F);
unsigned getInitialPSInputAddr(const Function &F);
+bool isShader(CallingConv::ID cc);
+bool isCompute(CallingConv::ID cc);
bool isSI(const MCSubtargetInfo &STI);
bool isCI(const MCSubtargetInfo &STI);
Modified: llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll (original)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll Wed Apr 6 14:40:20 2016
@@ -9,7 +9,7 @@
; CHECK: DIVERGENT: float %arg5
; CHECK: DIVERGENT: i32 %arg6
-define void @main([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
+define cc 87 void @main([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
ret void
}
Added: llvm/trunk/test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll?rev=265589&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll Wed Apr 6 14:40:20 2016
@@ -0,0 +1,21 @@
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+
+; GCN-LABEL: {{^}}shader_cc:
+; GCN: v_add_i32_e32 v0, vcc, s8, v0
+define amdgpu_cs float @shader_cc(<4 x i32> inreg, <4 x i32> inreg, i32 inreg %w, float %v) {
+ %vi = bitcast float %v to i32
+ %x = add i32 %vi, %w
+ %xf = bitcast i32 %x to float
+ ret float %xf
+}
+
+; GCN-LABEL: {{^}}kernel_cc:
+; GCN: s_endpgm
+define float @kernel_cc(<4 x i32> inreg, <4 x i32> inreg, i32 inreg %w, float %v) {
+ %vi = bitcast float %v to i32
+ %x = add i32 %vi, %w
+ %xf = bitcast i32 %x to float
+ ret float %xf
+}
Modified: llvm/trunk/test/CodeGen/AMDGPU/big_alu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/big_alu.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/big_alu.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/big_alu.ll Wed Apr 6 14:40:20 2016
@@ -3,7 +3,7 @@
; This test ensures that R600 backend can handle ifcvt properly
; and do not generate ALU clauses with more than 128 instructions.
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7, <4 x float> inreg %reg8, <4 x float> inreg %reg9) #1 {
+define amdgpu_ps void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7, <4 x float> inreg %reg8, <4 x float> inreg %reg9) {
main_body:
%0 = extractelement <4 x float> %reg0, i32 0
%1 = extractelement <4 x float> %reg0, i32 1
@@ -1297,7 +1297,6 @@ declare float @llvm.AMDGPU.clamp.f32(flo
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
attributes #0 = { alwaysinline nounwind readnone }
-attributes #1 = { "ShaderType"="0" }
attributes #2 = { readnone }
attributes #3 = { nounwind readnone }
attributes #4 = { readonly }
Modified: llvm/trunk/test/CodeGen/AMDGPU/bitcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/bitcast.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/bitcast.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/bitcast.ll Wed Apr 6 14:40:20 2016
@@ -7,7 +7,7 @@ declare void @llvm.SI.export(i32, i32, i
; FUNC-LABEL: {{^}}v32i8_to_v8i32:
; SI: s_endpgm
-define void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
+define amdgpu_ps void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
entry:
%1 = load <32 x i8>, <32 x i8> addrspace(2)* %0
%2 = bitcast <32 x i8> %1 to <8 x i32>
@@ -75,5 +75,3 @@ define void @bitcast_f64_to_v2i32(<2 x i
store <2 x i32> %bc, <2 x i32> addrspace(1)* %out, align 8
ret void
}
-
-attributes #0 = { "ShaderType"="0" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/call_fs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/call_fs.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/call_fs.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/call_fs.ll Wed Apr 6 14:40:20 2016
@@ -10,8 +10,6 @@
; R600:CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x89]
-define void @call_fs() #0 {
+define amdgpu_vs void @call_fs() {
ret void
}
-
-attributes #0 = { "ShaderType"="1" } ; Vertex Shader
Modified: llvm/trunk/test/CodeGen/AMDGPU/cayman-loop-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cayman-loop-bug.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/cayman-loop-bug.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/cayman-loop-bug.ll Wed Apr 6 14:40:20 2016
@@ -8,7 +8,7 @@
; CHECK-NOT: ALU_PUSH_BEFORE
; CHECK: END_LOOP
; CHECK: END_LOOP
-define void @main (<4 x float> inreg %reg0) #0 {
+define amdgpu_ps void @main (<4 x float> inreg %reg0) {
entry:
br label %outer_loop
outer_loop:
@@ -28,5 +28,3 @@ inner_loop_body:
exit:
ret void
}
-
-attributes #0 = { "ShaderType"="0" }
\ No newline at end of file
Modified: llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll Wed Apr 6 14:40:20 2016
@@ -4,7 +4,7 @@
; GCN-LABEL: {{^}}main:
; SI: v_lshl_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; VI: v_lshlrev_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 1
-define void @main() #0 {
+define amdgpu_ps void @main() #0 {
bb:
%tmp = fptosi float undef to i32
%tmp1 = call <4 x float> @llvm.SI.image.load.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -25,5 +25,5 @@ declare <4 x float> @llvm.SI.image.load.
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
+attributes #0 = { "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/complex-folding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/complex-folding.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/complex-folding.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/complex-folding.ll Wed Apr 6 14:40:20 2016
@@ -2,7 +2,7 @@
; CHECK: {{^}}main:
; CHECK-NOT: MOV
-define void @main(<4 x float> inreg %reg0) #0 {
+define amdgpu_ps void @main(<4 x float> inreg %reg0) {
entry:
%0 = extractelement <4 x float> %reg0, i32 0
%1 = call float @fabs(float %0)
@@ -15,5 +15,3 @@ entry:
declare float @fabs(float ) readnone
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="0" }
\ No newline at end of file
Modified: llvm/trunk/test/CodeGen/AMDGPU/elf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/elf.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/elf.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/elf.ll Wed Apr 6 14:40:20 2016
@@ -24,7 +24,7 @@
; TONGA-NEXT: .long 576
; CONFIG: .p2align 8
; CONFIG: test:
-define void @test(i32 %p) #0 {
+define amdgpu_ps void @test(i32 %p) {
%i = add i32 %p, 2
%r = bitcast i32 %i to float
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
@@ -32,5 +32,3 @@ define void @test(i32 %p) #0 {
}
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" } ; Pixel Shader
Modified: llvm/trunk/test/CodeGen/AMDGPU/fetch-limits.r600.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fetch-limits.r600.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fetch-limits.r600.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fetch-limits.r600.ll Wed Apr 6 14:40:20 2016
@@ -7,7 +7,7 @@
; CHECK: Fetch clause
; CHECK: Fetch clause
-define void @fetch_limits_r600() #0 {
+define amdgpu_ps void @fetch_limits_r600() {
entry:
%0 = load <4 x float>, <4 x float> addrspace(8)* null
%1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
@@ -42,7 +42,5 @@ entry:
ret void
}
-attributes #0 = { "ShaderType"="0" } ; Pixel Shader
-
declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
Modified: llvm/trunk/test/CodeGen/AMDGPU/fetch-limits.r700+.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fetch-limits.r700%2B.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fetch-limits.r700+.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fetch-limits.r700+.ll Wed Apr 6 14:40:20 2016
@@ -16,7 +16,7 @@
; CHECK: Fetch clause
; CHECK: Fetch clause
-define void @fetch_limits_r700() #0 {
+define amdgpu_ps void @fetch_limits_r700() {
entry:
%0 = load <4 x float>, <4 x float> addrspace(8)* null
%1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
@@ -75,7 +75,5 @@ entry:
ret void
}
-attributes #0 = { "ShaderType"="0" } ; Pixel Shader
-
declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
Modified: llvm/trunk/test/CodeGen/AMDGPU/floor.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/floor.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/floor.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/floor.ll Wed Apr 6 14:40:20 2016
@@ -1,7 +1,7 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s
; CHECK: FLOOR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @test(<4 x float> inreg %reg0) #0 {
+define amdgpu_ps void @test(<4 x float> inreg %reg0) {
%r0 = extractelement <4 x float> %reg0, i32 0
%r1 = call float @floor(float %r0)
%vec = insertelement <4 x float> undef, float %r1, i32 0
@@ -12,4 +12,3 @@ define void @test(<4 x float> inreg %reg
declare float @floor(float) readonly
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-attributes #0 = { "ShaderType"="0" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmad.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmad.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmad.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmad.ll Wed Apr 6 14:40:20 2016
@@ -2,7 +2,7 @@
;CHECK: MULADD_IEEE * {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @test(<4 x float> inreg %reg0) #0 {
+define amdgpu_ps void @test(<4 x float> inreg %reg0) {
%r0 = extractelement <4 x float> %reg0, i32 0
%r1 = extractelement <4 x float> %reg0, i32 1
%r2 = extractelement <4 x float> %reg0, i32 2
@@ -15,5 +15,3 @@ define void @test(<4 x float> inreg %reg
declare float @fabs(float ) readnone
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="0" }
\ No newline at end of file
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmax.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmax.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmax.ll Wed Apr 6 14:40:20 2016
@@ -2,7 +2,7 @@
;CHECK: MAX * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @test(<4 x float> inreg %reg0) #0 {
+define amdgpu_ps void @test(<4 x float> inreg %reg0) {
%r0 = extractelement <4 x float> %reg0, i32 0
%r1 = extractelement <4 x float> %reg0, i32 1
%r2 = fcmp oge float %r0, %r1
@@ -13,5 +13,3 @@ define void @test(<4 x float> inreg %reg
}
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="0" }
\ No newline at end of file
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmin.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmin.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmin.ll Wed Apr 6 14:40:20 2016
@@ -2,7 +2,7 @@
;CHECK: MIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @test(<4 x float> inreg %reg0) #0 {
+define amdgpu_ps void @test(<4 x float> inreg %reg0) {
%r0 = extractelement <4 x float> %reg0, i32 0
%r1 = extractelement <4 x float> %reg0, i32 1
%r2 = fcmp uge float %r0, %r1
@@ -13,5 +13,3 @@ define void @test(<4 x float> inreg %reg
}
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="0" }
\ No newline at end of file
Modified: llvm/trunk/test/CodeGen/AMDGPU/inline-asm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/inline-asm.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/inline-asm.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/inline-asm.ll Wed Apr 6 14:40:20 2016
@@ -14,14 +14,12 @@ entry:
; CHECK: {{^}}inline_asm_shader:
; CHECK: s_endpgm
; CHECK: s_endpgm
-define void @inline_asm_shader() #0 {
+define amdgpu_ps void @inline_asm_shader() {
entry:
call void asm sideeffect "s_endpgm", ""()
ret void
}
-attributes #0 = { "ShaderType"="0" }
-
; CHECK: {{^}}branch_on_asm:
; Make sure inline assembly is treted as divergent.
Modified: llvm/trunk/test/CodeGen/AMDGPU/input-mods.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/input-mods.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/input-mods.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/input-mods.ll Wed Apr 6 14:40:20 2016
@@ -9,7 +9,7 @@
;CM: EXP_IEEE T{{[0-9]+}}.Z (MASKED), -|T{{[0-9]+}}.X|
;CM: EXP_IEEE * T{{[0-9]+}}.W (MASKED), -|T{{[0-9]+}}.X|
-define void @test(<4 x float> inreg %reg0) #0 {
+define amdgpu_ps void @test(<4 x float> inreg %reg0) {
%r0 = extractelement <4 x float> %reg0, i32 0
%r1 = call float @llvm.fabs.f32(float %r0)
%r2 = fsub float -0.000000e+00, %r1
@@ -22,5 +22,3 @@ define void @test(<4 x float> inreg %reg
declare float @llvm.exp2.f32(float) readnone
declare float @llvm.fabs.f32(float) readnone
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="0" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/jump-address.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/jump-address.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/jump-address.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/jump-address.ll Wed Apr 6 14:40:20 2016
@@ -4,7 +4,7 @@
; CHECK: EXPORT
; CHECK-NOT: EXPORT
-define void @main() #0 {
+define amdgpu_ps void @main() {
main_body:
%0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%1 = extractelement <4 x float> %0, i32 0
@@ -48,5 +48,3 @@ IF13:
}
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="0" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll Wed Apr 6 14:40:20 2016
@@ -12,7 +12,7 @@
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
; ALL: ; ScratchSize: 32772
-define void @large_alloca_pixel_shader(i32 %x, i32 %y) #1 {
+define amdgpu_ps void @large_alloca_pixel_shader(i32 %x, i32 %y) #0 {
%large = alloca [8192 x i32], align 4
%gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
store volatile i32 %x, i32* %gep
@@ -33,7 +33,7 @@ define void @large_alloca_pixel_shader(i
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
; ALL: ; ScratchSize: 32772
-define void @large_alloca_pixel_shader_inreg(i32 inreg %x, i32 inreg %y) #1 {
+define amdgpu_ps void @large_alloca_pixel_shader_inreg(i32 inreg %x, i32 inreg %y) #0 {
%large = alloca [8192 x i32], align 4
%gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
store volatile i32 %x, i32* %gep
@@ -44,4 +44,3 @@ define void @large_alloca_pixel_shader_i
}
attributes #0 = { nounwind }
-attributes #1 = { nounwind "ShaderType"="0" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll Wed Apr 6 14:40:20 2016
@@ -5,7 +5,7 @@
; CHECK: CUBE T{{[0-9]}}.Y
; CHECK: CUBE T{{[0-9]}}.Z
; CHECK: CUBE * T{{[0-9]}}.W
-define void @cube() #0 {
+define amdgpu_ps void @cube() {
main_body:
%0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
%1 = extractelement <4 x float> %0, i32 3
@@ -43,16 +43,15 @@ main_body:
}
; Function Attrs: readnone
-declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #1
+declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0
; Function Attrs: readnone
-declare float @fabs(float) #1
+declare float @fabs(float) #0
; Function Attrs: readnone
-declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #1
+declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #0
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { readnone }
+attributes #0 = { readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll Wed Apr 6 14:40:20 2016
@@ -5,7 +5,7 @@
; SI-NOT: v_cmpx_le_f32
; SI: s_mov_b64 exec, 0
-define void @kill_gs_const() #0 {
+define amdgpu_gs void @kill_gs_const() {
main_body:
%0 = icmp ule i32 0, 3
%1 = select i1 %0, float 1.000000e+00, float -1.000000e+00
@@ -21,7 +21,7 @@ main_body:
; SI: v_cmp_gt_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], 0, v{{[0-9]+}}
; SI: v_cmpx_le_f32_e32 vcc, 0, v{{[0-9]+}}
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0, [[CMP]]
-define void @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #1 {
+define amdgpu_ps void @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
entry:
%tmp0 = fcmp olt float %13, 0.0
call void @llvm.AMDGPU.kill(float %14)
@@ -33,7 +33,4 @@ entry:
declare void @llvm.AMDGPU.kill(float)
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-attributes #0 = { "ShaderType"="2" }
-attributes #1 = { "ShaderType"="0" }
-
!0 = !{!"const", null, i32 1}
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll Wed Apr 6 14:40:20 2016
@@ -10,7 +10,7 @@
;GCN: v_interp_p1_f32
;GCN: v_interp_p2_f32
-define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) #0 {
+define amdgpu_ps void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) {
main_body:
%5 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
%6 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %4)
@@ -25,7 +25,7 @@ main_body:
; 16BANK-LABEL: {{^}}v_interp_p1_bank16_bug:
; 16BANK-NOT: v_interp_p1_f32 [[DST:v[0-9]+]], [[DST]]
-define void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
+define amdgpu_ps void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
main_body:
%22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
@@ -42,19 +42,18 @@ main_body:
}
; Function Attrs: readnone
-declare float @fabs(float) #2
+declare float @fabs(float) #1
; Function Attrs: nounwind readnone
-declare i32 @llvm.SI.packf16(float, float) #1
+declare i32 @llvm.SI.packf16(float, float) #0
; Function Attrs: nounwind readnone
-declare float @llvm.SI.fs.constant(i32, i32, i32) #1
+declare float @llvm.SI.fs.constant(i32, i32, i32) #0
; Function Attrs: nounwind readnone
-declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
+declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #0
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { readnone }
+attributes #0 = { nounwind readnone }
+attributes #1 = { readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.gather4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.gather4.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.gather4.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.gather4.ll Wed Apr 6 14:40:20 2016
@@ -3,7 +3,7 @@
;CHECK-LABEL: {{^}}gather4_v2:
;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_v2() #0 {
+define amdgpu_ps void @gather4_v2() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -16,7 +16,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4:
;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4() #0 {
+define amdgpu_ps void @gather4() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -29,7 +29,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_cl:
;CHECK: image_gather4_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_cl() #0 {
+define amdgpu_ps void @gather4_cl() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -42,7 +42,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_l:
;CHECK: image_gather4_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_l() #0 {
+define amdgpu_ps void @gather4_l() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -55,7 +55,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_b:
;CHECK: image_gather4_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_b() #0 {
+define amdgpu_ps void @gather4_b() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -68,7 +68,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_b_cl:
;CHECK: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_b_cl() #0 {
+define amdgpu_ps void @gather4_b_cl() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -81,7 +81,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_b_cl_v8:
;CHECK: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_b_cl_v8() #0 {
+define amdgpu_ps void @gather4_b_cl_v8() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -94,7 +94,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_lz_v2:
;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_lz_v2() #0 {
+define amdgpu_ps void @gather4_lz_v2() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -107,7 +107,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_lz:
;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_lz() #0 {
+define amdgpu_ps void @gather4_lz() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -122,7 +122,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_o:
;CHECK: image_gather4_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_o() #0 {
+define amdgpu_ps void @gather4_o() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -135,7 +135,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_cl_o:
;CHECK: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_cl_o() #0 {
+define amdgpu_ps void @gather4_cl_o() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -148,7 +148,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_cl_o_v8:
;CHECK: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_cl_o_v8() #0 {
+define amdgpu_ps void @gather4_cl_o_v8() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -161,7 +161,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_l_o:
;CHECK: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_l_o() #0 {
+define amdgpu_ps void @gather4_l_o() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -174,7 +174,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_l_o_v8:
;CHECK: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_l_o_v8() #0 {
+define amdgpu_ps void @gather4_l_o_v8() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -187,7 +187,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_b_o:
;CHECK: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_b_o() #0 {
+define amdgpu_ps void @gather4_b_o() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -200,7 +200,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_b_o_v8:
;CHECK: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_b_o_v8() #0 {
+define amdgpu_ps void @gather4_b_o_v8() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -213,7 +213,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_b_cl_o:
;CHECK: image_gather4_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_b_cl_o() #0 {
+define amdgpu_ps void @gather4_b_cl_o() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -226,7 +226,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_lz_o:
;CHECK: image_gather4_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_lz_o() #0 {
+define amdgpu_ps void @gather4_lz_o() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -241,7 +241,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_c:
;CHECK: image_gather4_c {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c() #0 {
+define amdgpu_ps void @gather4_c() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -254,7 +254,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_c_cl:
;CHECK: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_cl() #0 {
+define amdgpu_ps void @gather4_c_cl() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -267,7 +267,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_c_cl_v8:
;CHECK: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_cl_v8() #0 {
+define amdgpu_ps void @gather4_c_cl_v8() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -280,7 +280,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_c_l:
;CHECK: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_l() #0 {
+define amdgpu_ps void @gather4_c_l() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -293,7 +293,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_c_l_v8:
;CHECK: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_l_v8() #0 {
+define amdgpu_ps void @gather4_c_l_v8() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -306,7 +306,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_c_b:
;CHECK: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_b() #0 {
+define amdgpu_ps void @gather4_c_b() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -319,7 +319,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_c_b_v8:
;CHECK: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_b_v8() #0 {
+define amdgpu_ps void @gather4_c_b_v8() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -332,7 +332,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_c_b_cl:
;CHECK: image_gather4_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_b_cl() #0 {
+define amdgpu_ps void @gather4_c_b_cl() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -345,7 +345,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_c_lz:
;CHECK: image_gather4_c_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_lz() #0 {
+define amdgpu_ps void @gather4_c_lz() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -360,7 +360,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_c_o:
;CHECK: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_o() #0 {
+define amdgpu_ps void @gather4_c_o() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -373,7 +373,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_c_o_v8:
;CHECK: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_o_v8() #0 {
+define amdgpu_ps void @gather4_c_o_v8() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -386,7 +386,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_c_cl_o:
;CHECK: image_gather4_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_cl_o() #0 {
+define amdgpu_ps void @gather4_c_cl_o() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -399,7 +399,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_c_l_o:
;CHECK: image_gather4_c_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_l_o() #0 {
+define amdgpu_ps void @gather4_c_l_o() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -412,7 +412,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_c_b_o:
;CHECK: image_gather4_c_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_b_o() #0 {
+define amdgpu_ps void @gather4_c_b_o() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -425,7 +425,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_c_b_cl_o:
;CHECK: image_gather4_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_b_cl_o() #0 {
+define amdgpu_ps void @gather4_c_b_cl_o() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -438,7 +438,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_c_lz_o:
;CHECK: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_lz_o() #0 {
+define amdgpu_ps void @gather4_c_lz_o() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -451,7 +451,7 @@ main_body:
;CHECK-LABEL: {{^}}gather4_c_lz_o_v8:
;CHECK: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_lz_o_v8() #0 {
+define amdgpu_ps void @gather4_c_lz_o_v8() {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -464,46 +464,45 @@ main_body:
-declare <4 x float> @llvm.SI.gather4.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
-declare <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
-declare <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
-declare <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+
+declare <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+
+declare <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+
+declare <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
+attributes #0 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.getlod.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.getlod.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.getlod.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.getlod.ll Wed Apr 6 14:40:20 2016
@@ -3,7 +3,7 @@
;CHECK-LABEL: {{^}}getlod:
;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 da
-define void @getlod() #0 {
+define amdgpu_ps void @getlod() {
main_body:
%r = call <4 x float> @llvm.SI.getlod.i32(i32 undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -14,7 +14,7 @@ main_body:
;CHECK-LABEL: {{^}}getlod_v2:
;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 da
-define void @getlod_v2() #0 {
+define amdgpu_ps void @getlod_v2() {
main_body:
%r = call <4 x float> @llvm.SI.getlod.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -25,7 +25,7 @@ main_body:
;CHECK-LABEL: {{^}}getlod_v4:
;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 da
-define void @getlod_v4() #0 {
+define amdgpu_ps void @getlod_v4() {
main_body:
%r = call <4 x float> @llvm.SI.getlod.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -35,11 +35,10 @@ main_body:
}
-declare <4 x float> @llvm.SI.getlod.i32(i32, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.getlod.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.getlod.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.getlod.i32(i32, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.getlod.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.getlod.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
+attributes #0 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.ll Wed Apr 6 14:40:20 2016
@@ -3,7 +3,7 @@
;CHECK-LABEL: {{^}}image_load:
;CHECK: image_load {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @image_load() #0 {
+define amdgpu_ps void @image_load() {
main_body:
%r = call <4 x float> @llvm.SI.image.load.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -16,7 +16,7 @@ main_body:
;CHECK-LABEL: {{^}}image_load_mip:
;CHECK: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @image_load_mip() #0 {
+define amdgpu_ps void @image_load_mip() {
main_body:
%r = call <4 x float> @llvm.SI.image.load.mip.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -29,7 +29,7 @@ main_body:
;CHECK-LABEL: {{^}}getresinfo:
;CHECK: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @getresinfo() #0 {
+define amdgpu_ps void @getresinfo() {
main_body:
%r = call <4 x float> @llvm.SI.getresinfo.i32(i32 undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -40,11 +40,10 @@ main_body:
ret void
}
-declare <4 x float> @llvm.SI.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.load.mip.v4i32(<4 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.getresinfo.i32(i32, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.load.mip.v4i32(<4 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.getresinfo.i32(i32, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
+attributes #0 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.sample-masked.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.sample-masked.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.sample-masked.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.sample-masked.ll Wed Apr 6 14:40:20 2016
@@ -3,7 +3,7 @@
; CHECK-LABEL: {{^}}v1:
; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xd
-define void @v1(i32 %a1) #0 {
+define amdgpu_ps void @v1(i32 %a1) {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
%1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -16,7 +16,7 @@ entry:
; CHECK-LABEL: {{^}}v2:
; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xb
-define void @v2(i32 %a1) #0 {
+define amdgpu_ps void @v2(i32 %a1) {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
%1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -29,7 +29,7 @@ entry:
; CHECK-LABEL: {{^}}v3:
; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xe
-define void @v3(i32 %a1) #0 {
+define amdgpu_ps void @v3(i32 %a1) {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
%1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -42,7 +42,7 @@ entry:
; CHECK-LABEL: {{^}}v4:
; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x7
-define void @v4(i32 %a1) #0 {
+define amdgpu_ps void @v4(i32 %a1) {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
%1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -55,7 +55,7 @@ entry:
; CHECK-LABEL: {{^}}v5:
; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xa
-define void @v5(i32 %a1) #0 {
+define amdgpu_ps void @v5(i32 %a1) {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
%1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -67,7 +67,7 @@ entry:
; CHECK-LABEL: {{^}}v6:
; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x6
-define void @v6(i32 %a1) #0 {
+define amdgpu_ps void @v6(i32 %a1) {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
%1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -79,7 +79,7 @@ entry:
; CHECK-LABEL: {{^}}v7:
; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x9
-define void @v7(i32 %a1) #0 {
+define amdgpu_ps void @v7(i32 %a1) {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
%1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -92,5 +92,3 @@ entry:
declare <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) readnone
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.sample.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.sample.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.sample.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.sample.ll Wed Apr 6 14:40:20 2016
@@ -4,7 +4,7 @@
;CHECK-LABEL: {{^}}sample:
;CHECK: s_wqm
;CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample() #0 {
+define amdgpu_ps void @sample() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -18,7 +18,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_cl:
;CHECK: s_wqm
;CHECK: image_sample_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_cl() #0 {
+define amdgpu_ps void @sample_cl() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -32,7 +32,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_d:
;CHECK-NOT: s_wqm
;CHECK: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_d() #0 {
+define amdgpu_ps void @sample_d() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.d.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -46,7 +46,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_d_cl:
;CHECK-NOT: s_wqm
;CHECK: image_sample_d_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_d_cl() #0 {
+define amdgpu_ps void @sample_d_cl() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.d.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -60,7 +60,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_l:
;CHECK-NOT: s_wqm
;CHECK: image_sample_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_l() #0 {
+define amdgpu_ps void @sample_l() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -74,7 +74,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_b:
;CHECK: s_wqm
;CHECK: image_sample_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_b() #0 {
+define amdgpu_ps void @sample_b() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -88,7 +88,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_b_cl:
;CHECK: s_wqm
;CHECK: image_sample_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_b_cl() #0 {
+define amdgpu_ps void @sample_b_cl() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.b.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -102,7 +102,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_lz:
;CHECK-NOT: s_wqm
;CHECK: image_sample_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_lz() #0 {
+define amdgpu_ps void @sample_lz() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -116,7 +116,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_cd:
;CHECK-NOT: s_wqm
;CHECK: image_sample_cd {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_cd() #0 {
+define amdgpu_ps void @sample_cd() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.cd.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -130,7 +130,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_cd_cl:
;CHECK-NOT: s_wqm
;CHECK: image_sample_cd_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_cd_cl() #0 {
+define amdgpu_ps void @sample_cd_cl() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.cd.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -144,7 +144,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_c:
;CHECK: s_wqm
;CHECK: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_c() #0 {
+define amdgpu_ps void @sample_c() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -158,7 +158,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_c_cl:
;CHECK: s_wqm
;CHECK: image_sample_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_c_cl() #0 {
+define amdgpu_ps void @sample_c_cl() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -172,7 +172,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_c_d:
;CHECK-NOT: s_wqm
;CHECK: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_c_d() #0 {
+define amdgpu_ps void @sample_c_d() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.d.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -186,7 +186,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_c_d_cl:
;CHECK-NOT: s_wqm
;CHECK: image_sample_c_d_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_c_d_cl() #0 {
+define amdgpu_ps void @sample_c_d_cl() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.d.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -200,7 +200,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_c_l:
;CHECK-NOT: s_wqm
;CHECK: image_sample_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_c_l() #0 {
+define amdgpu_ps void @sample_c_l() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -214,7 +214,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_c_b:
;CHECK: s_wqm
;CHECK: image_sample_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_c_b() #0 {
+define amdgpu_ps void @sample_c_b() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -228,7 +228,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_c_b_cl:
;CHECK: s_wqm
;CHECK: image_sample_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_c_b_cl() #0 {
+define amdgpu_ps void @sample_c_b_cl() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.b.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -242,7 +242,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_c_lz:
;CHECK-NOT: s_wqm
;CHECK: image_sample_c_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_c_lz() #0 {
+define amdgpu_ps void @sample_c_lz() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -256,7 +256,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_c_cd:
;CHECK-NOT: s_wqm
;CHECK: image_sample_c_cd {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_c_cd() #0 {
+define amdgpu_ps void @sample_c_cd() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.cd.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -270,7 +270,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_c_cd_cl:
;CHECK-NOT: s_wqm
;CHECK: image_sample_c_cd_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_c_cd_cl() #0 {
+define amdgpu_ps void @sample_c_cd_cl() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.cd.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -282,29 +282,28 @@ main_body:
}
-declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.d.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.d.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.b.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.cd.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.cd.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
-declare <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.d.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.d.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.b.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.cd.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.cd.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.d.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.d.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.b.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.cd.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.cd.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+
+declare <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.c.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.c.d.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.c.d.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.c.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.c.b.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.c.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.c.cd.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.c.cd.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
+attributes #0 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.sample.o.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.sample.o.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.sample.o.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.sample.o.ll Wed Apr 6 14:40:20 2016
@@ -4,7 +4,7 @@
;CHECK-LABEL: {{^}}sample:
;CHECK: s_wqm
;CHECK: image_sample_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample() #0 {
+define amdgpu_ps void @sample() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -18,7 +18,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_cl:
;CHECK: s_wqm
;CHECK: image_sample_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_cl() #0 {
+define amdgpu_ps void @sample_cl() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -32,7 +32,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_d:
;CHECK-NOT: s_wqm
;CHECK: image_sample_d_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_d() #0 {
+define amdgpu_ps void @sample_d() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.d.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -46,7 +46,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_d_cl:
;CHECK-NOT: s_wqm
;CHECK: image_sample_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_d_cl() #0 {
+define amdgpu_ps void @sample_d_cl() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.d.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -60,7 +60,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_l:
;CHECK-NOT: s_wqm
;CHECK: image_sample_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_l() #0 {
+define amdgpu_ps void @sample_l() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -74,7 +74,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_b:
;CHECK: s_wqm
;CHECK: image_sample_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_b() #0 {
+define amdgpu_ps void @sample_b() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.b.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -88,7 +88,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_b_cl:
;CHECK: s_wqm
;CHECK: image_sample_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_b_cl() #0 {
+define amdgpu_ps void @sample_b_cl() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.b.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -102,7 +102,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_lz:
;CHECK-NOT: s_wqm
;CHECK: image_sample_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_lz() #0 {
+define amdgpu_ps void @sample_lz() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -116,7 +116,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_cd:
;CHECK-NOT: s_wqm
;CHECK: image_sample_cd_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_cd() #0 {
+define amdgpu_ps void @sample_cd() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.cd.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -130,7 +130,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_cd_cl:
;CHECK-NOT: s_wqm
;CHECK: image_sample_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_cd_cl() #0 {
+define amdgpu_ps void @sample_cd_cl() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.cd.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -144,7 +144,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_c:
;CHECK: s_wqm
;CHECK: image_sample_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_c() #0 {
+define amdgpu_ps void @sample_c() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -158,7 +158,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_c_cl:
;CHECK: s_wqm
;CHECK: image_sample_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_c_cl() #0 {
+define amdgpu_ps void @sample_c_cl() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -172,7 +172,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_c_d:
;CHECK-NOT: s_wqm
;CHECK: image_sample_c_d_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_c_d() #0 {
+define amdgpu_ps void @sample_c_d() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.d.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -186,7 +186,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_c_d_cl:
;CHECK-NOT: s_wqm
;CHECK: image_sample_c_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_c_d_cl() #0 {
+define amdgpu_ps void @sample_c_d_cl() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.d.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -200,7 +200,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_c_l:
;CHECK-NOT: s_wqm
;CHECK: image_sample_c_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_c_l() #0 {
+define amdgpu_ps void @sample_c_l() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.l.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -214,7 +214,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_c_b:
;CHECK: s_wqm
;CHECK: image_sample_c_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_c_b() #0 {
+define amdgpu_ps void @sample_c_b() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.b.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -228,7 +228,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_c_b_cl:
;CHECK: s_wqm
;CHECK: image_sample_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_c_b_cl() #0 {
+define amdgpu_ps void @sample_c_b_cl() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.b.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -242,7 +242,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_c_lz:
;CHECK-NOT: s_wqm
;CHECK: image_sample_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_c_lz() #0 {
+define amdgpu_ps void @sample_c_lz() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -256,7 +256,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_c_cd:
;CHECK-NOT: s_wqm
;CHECK: image_sample_c_cd_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_c_cd() #0 {
+define amdgpu_ps void @sample_c_cd() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.cd.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -270,7 +270,7 @@ main_body:
;CHECK-LABEL: {{^}}sample_c_cd_cl:
;CHECK-NOT: s_wqm
;CHECK: image_sample_c_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define void @sample_c_cd_cl() #0 {
+define amdgpu_ps void @sample_c_cd_cl() {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.cd.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%r0 = extractelement <4 x float> %r, i32 0
@@ -282,29 +282,28 @@ main_body:
}
-declare <4 x float> @llvm.SI.image.sample.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.d.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.d.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.b.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.b.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.cd.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.cd.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
-declare <4 x float> @llvm.SI.image.sample.c.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.d.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.d.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.b.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.b.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.cd.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.cd.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.image.sample.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.d.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.d.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.b.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.b.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.cd.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.cd.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+
+declare <4 x float> @llvm.SI.image.sample.c.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.c.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.c.d.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.c.d.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.c.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.c.b.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.c.b.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.c.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.c.cd.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.SI.image.sample.c.cd.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
+attributes #0 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll Wed Apr 6 14:40:20 2016
@@ -14,7 +14,7 @@
; CHECK: s_movk_i32 [[K:s[0-9]+]], 0x4d2 ; encoding
; CHECK: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, [[K]] idxen offen offset:65535 glc slc
-define void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, [2 x <16 x i8>] addrspace(2)* byval %arg3, [17 x <16 x i8>] addrspace(2)* inreg %arg4, [17 x <16 x i8>] addrspace(2)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) #0 {
+define amdgpu_vs void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, [2 x <16 x i8>] addrspace(2)* byval %arg3, [17 x <16 x i8>] addrspace(2)* inreg %arg4, [17 x <16 x i8>] addrspace(2)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) {
main_body:
%tmp = getelementptr [2 x <16 x i8>], [2 x <16 x i8>] addrspace(2)* %arg3, i64 0, i32 1
%tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -40,14 +40,13 @@ main_body:
}
; Function Attrs: nounwind readonly
-declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #0
; Function Attrs: nounwind readonly
-declare i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32) #1
+declare i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32) #0
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-attributes #0 = { "ShaderType"="1" }
-attributes #1 = { nounwind readonly }
+attributes #0 = { nounwind readonly }
!0 = !{!"const", null, i32 1}
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.packf16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.packf16.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.packf16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.packf16.ll Wed Apr 6 14:40:20 2016
@@ -6,7 +6,7 @@
; GCN: v_cvt_pkrtz_f16_f32
; GCN-NOT: v_cvt_pkrtz_f16_f32
-define void @main(float %src) #0 {
+define amdgpu_ps void @main(float %src) {
main_body:
%p1 = call i32 @llvm.SI.packf16(float undef, float %src)
%p2 = call i32 @llvm.SI.packf16(float %src, float undef)
@@ -21,9 +21,8 @@ main_body:
}
; Function Attrs: nounwind readnone
-declare i32 @llvm.SI.packf16(float, float) #1
+declare i32 @llvm.SI.packf16(float, float) #0
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
+attributes #0 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll Wed Apr 6 14:40:20 2016
@@ -7,7 +7,7 @@
; BOTH-NEXT: s_sendmsg Gs_done(nop)
; BOTH-NEXT: s_endpgm
-define void @main(i32 inreg %a) #0 {
+define amdgpu_gs void @main(i32 inreg %a) #0 {
main_body:
call void @llvm.SI.sendmsg(i32 3, i32 %a)
ret void
@@ -16,5 +16,5 @@ main_body:
; Function Attrs: nounwind
declare void @llvm.SI.sendmsg(i32, i32) #1
-attributes #0 = { "ShaderType"="2" "unsafe-fp-math"="true" }
+attributes #0 = { "unsafe-fp-math"="true" }
attributes #1 = { nounwind }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.tbuffer.store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.tbuffer.store.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.tbuffer.store.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.tbuffer.store.ll Wed Apr 6 14:40:20 2016
@@ -3,7 +3,7 @@
;CHECK-LABEL: {{^}}test1:
;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, 0x20, -1, 0, -1, 0, 14, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
-define void @test1(i32 %a1, i32 %vaddr) #0 {
+define amdgpu_vs void @test1(i32 %a1, i32 %vaddr) {
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1,
@@ -13,7 +13,7 @@ define void @test1(i32 %a1, i32 %vaddr)
;CHECK-LABEL: {{^}}test2:
;CHECK: tbuffer_store_format_xyz {{v\[[0-9]+:[0-9]+\]}}, 0x18, -1, 0, -1, 0, 13, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
-define void @test2(i32 %a1, i32 %vaddr) #0 {
+define amdgpu_vs void @test2(i32 %a1, i32 %vaddr) {
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
i32 3, i32 %vaddr, i32 0, i32 24, i32 13, i32 4, i32 1, i32 0, i32 1,
@@ -23,7 +23,7 @@ define void @test2(i32 %a1, i32 %vaddr)
;CHECK-LABEL: {{^}}test3:
;CHECK: tbuffer_store_format_xy {{v\[[0-9]+:[0-9]+\]}}, 0x10, -1, 0, -1, 0, 11, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
-define void @test3(i32 %a1, i32 %vaddr) #0 {
+define amdgpu_vs void @test3(i32 %a1, i32 %vaddr) {
%vdata = insertelement <2 x i32> undef, i32 %a1, i32 0
call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> undef, <2 x i32> %vdata,
i32 2, i32 %vaddr, i32 0, i32 16, i32 11, i32 4, i32 1, i32 0, i32 1,
@@ -33,7 +33,7 @@ define void @test3(i32 %a1, i32 %vaddr)
;CHECK-LABEL: {{^}}test4:
;CHECK: tbuffer_store_format_x {{v[0-9]+}}, 0x8, -1, 0, -1, 0, 4, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
-define void @test4(i32 %vdata, i32 %vaddr) #0 {
+define amdgpu_vs void @test4(i32 %vdata, i32 %vaddr) {
call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %vdata,
i32 1, i32 %vaddr, i32 0, i32 8, i32 4, i32 4, i32 1, i32 0, i32 1,
i32 1, i32 0)
@@ -43,5 +43,3 @@ define void @test4(i32 %vdata, i32 %vadd
declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
declare void @llvm.SI.tbuffer.store.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.tid.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.tid.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.tid.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.tid.ll Wed Apr 6 14:40:20 2016
@@ -5,7 +5,7 @@
;SI: v_mbcnt_hi_u32_b32_e32
;VI: v_mbcnt_hi_u32_b32_e64
-define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
+define amdgpu_ps void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) {
main_body:
%4 = call i32 @llvm.SI.tid()
%5 = bitcast i32 %4 to float
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll Wed Apr 6 14:40:20 2016
@@ -16,7 +16,7 @@
;CHECK: buffer_atomic_swap v0, s[0:3], [[SOFS]] offset:1 glc
;CHECK: s_waitcnt vmcnt(0)
;CHECK: buffer_atomic_swap v0, s[0:3], 0{{$}}
-define float @test1(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex, i32 %voffset) #0 {
+define amdgpu_ps float @test1(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex, i32 %voffset) {
main_body:
%o1 = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
%o2 = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %o1, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
@@ -48,7 +48,7 @@ main_body:
;CHECK: buffer_atomic_or v0, v1, s[0:3], 0 idxen glc
;CHECK: s_waitcnt vmcnt(0)
;CHECK: buffer_atomic_xor v0, v1, s[0:3], 0 idxen glc
-define float @test2(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) #0 {
+define amdgpu_ps float @test2(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) {
main_body:
%t1 = call i32 @llvm.amdgcn.buffer.atomic.add(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
%t2 = call i32 @llvm.amdgcn.buffer.atomic.sub(i32 %t1, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
@@ -80,7 +80,7 @@ main_body:
;CHECK-DAG: s_waitcnt vmcnt(0)
;CHECK-DAG: s_movk_i32 [[SOFS:s[0-9]+]], 0x1fff
;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[SOFS]] offset:1 glc
-define float @test3(<4 x i32> inreg %rsrc, i32 %data, i32 %cmp, i32 %vindex, i32 %voffset) #0 {
+define amdgpu_ps float @test3(<4 x i32> inreg %rsrc, i32 %data, i32 %cmp, i32 %vindex, i32 %voffset) {
main_body:
%o1 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
%o2 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %o1, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
@@ -100,17 +100,16 @@ main_body:
ret float %out
}
-declare i32 @llvm.amdgcn.buffer.atomic.swap(i32, <4 x i32>, i32, i32, i1) #1
-declare i32 @llvm.amdgcn.buffer.atomic.add(i32, <4 x i32>, i32, i32, i1) #1
-declare i32 @llvm.amdgcn.buffer.atomic.sub(i32, <4 x i32>, i32, i32, i1) #1
-declare i32 @llvm.amdgcn.buffer.atomic.smin(i32, <4 x i32>, i32, i32, i1) #1
-declare i32 @llvm.amdgcn.buffer.atomic.umin(i32, <4 x i32>, i32, i32, i1) #1
-declare i32 @llvm.amdgcn.buffer.atomic.smax(i32, <4 x i32>, i32, i32, i1) #1
-declare i32 @llvm.amdgcn.buffer.atomic.umax(i32, <4 x i32>, i32, i32, i1) #1
-declare i32 @llvm.amdgcn.buffer.atomic.and(i32, <4 x i32>, i32, i32, i1) #1
-declare i32 @llvm.amdgcn.buffer.atomic.or(i32, <4 x i32>, i32, i32, i1) #1
-declare i32 @llvm.amdgcn.buffer.atomic.xor(i32, <4 x i32>, i32, i32, i1) #1
-declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #1
+declare i32 @llvm.amdgcn.buffer.atomic.swap(i32, <4 x i32>, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.buffer.atomic.add(i32, <4 x i32>, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.buffer.atomic.sub(i32, <4 x i32>, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.buffer.atomic.smin(i32, <4 x i32>, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.buffer.atomic.umin(i32, <4 x i32>, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.buffer.atomic.smax(i32, <4 x i32>, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.buffer.atomic.umax(i32, <4 x i32>, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.buffer.atomic.and(i32, <4 x i32>, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.buffer.atomic.or(i32, <4 x i32>, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.buffer.atomic.xor(i32, <4 x i32>, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #0
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind }
+attributes #0 = { nounwind }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll Wed Apr 6 14:40:20 2016
@@ -6,7 +6,7 @@
;CHECK: buffer_load_format_xyzw v[4:7], s[0:3], 0 glc
;CHECK: buffer_load_format_xyzw v[8:11], s[0:3], 0 slc
;CHECK: s_waitcnt
-define {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) #0 {
+define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) {
main_body:
%data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 0, i1 0, i1 0)
%data_glc = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 0, i1 1, i1 0)
@@ -20,7 +20,7 @@ main_body:
;CHECK-LABEL: {{^}}buffer_load_immoffs:
;CHECK: buffer_load_format_xyzw v[0:3], s[0:3], 0 offset:42
;CHECK: s_waitcnt
-define <4 x float> @buffer_load_immoffs(<4 x i32> inreg) #0 {
+define amdgpu_ps <4 x float> @buffer_load_immoffs(<4 x i32> inreg) {
main_body:
%data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 42, i1 0, i1 0)
ret <4 x float> %data
@@ -33,7 +33,7 @@ main_body:
;CHECK: s_mov_b32 [[OFS2:s[0-9]+]], 0x8fff
;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS2]] offset:1
;CHECK: s_waitcnt
-define <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) #0 {
+define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) {
main_body:
%d.0 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4156, i1 0, i1 0)
%d.1 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 36860, i1 0, i1 0)
@@ -49,7 +49,7 @@ main_body:
;CHECK-NOT: s_mov
;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS]] offset:81
;CHECK: s_waitcnt
-define <4 x float> @buffer_load_immoffs_reuse(<4 x i32> inreg) #0 {
+define amdgpu_ps <4 x float> @buffer_load_immoffs_reuse(<4 x i32> inreg) {
main_body:
%d.0 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4160, i1 0, i1 0)
%d.1 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4176, i1 0, i1 0)
@@ -60,7 +60,7 @@ main_body:
;CHECK-LABEL: {{^}}buffer_load_idx:
;CHECK: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
;CHECK: s_waitcnt
-define <4 x float> @buffer_load_idx(<4 x i32> inreg, i32) #0 {
+define amdgpu_ps <4 x float> @buffer_load_idx(<4 x i32> inreg, i32) {
main_body:
%data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %1, i32 0, i1 0, i1 0)
ret <4 x float> %data
@@ -69,7 +69,7 @@ main_body:
;CHECK-LABEL: {{^}}buffer_load_ofs:
;CHECK: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 offen
;CHECK: s_waitcnt
-define <4 x float> @buffer_load_ofs(<4 x i32> inreg, i32) #0 {
+define amdgpu_ps <4 x float> @buffer_load_ofs(<4 x i32> inreg, i32) {
main_body:
%data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 %1, i1 0, i1 0)
ret <4 x float> %data
@@ -78,7 +78,7 @@ main_body:
;CHECK-LABEL: {{^}}buffer_load_ofs_imm:
;CHECK: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 offen offset:58
;CHECK: s_waitcnt
-define <4 x float> @buffer_load_ofs_imm(<4 x i32> inreg, i32) #0 {
+define amdgpu_ps <4 x float> @buffer_load_ofs_imm(<4 x i32> inreg, i32) {
main_body:
%ofs = add i32 %1, 58
%data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 %ofs, i1 0, i1 0)
@@ -88,7 +88,7 @@ main_body:
;CHECK-LABEL: {{^}}buffer_load_both:
;CHECK: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
;CHECK: s_waitcnt
-define <4 x float> @buffer_load_both(<4 x i32> inreg, i32, i32) #0 {
+define amdgpu_ps <4 x float> @buffer_load_both(<4 x i32> inreg, i32, i32) {
main_body:
%data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %1, i32 %2, i1 0, i1 0)
ret <4 x float> %data
@@ -98,7 +98,7 @@ main_body:
;CHECK: v_mov_b32_e32 v2, v0
;CHECK: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
;CHECK: s_waitcnt
-define <4 x float> @buffer_load_both_reversed(<4 x i32> inreg, i32, i32) #0 {
+define amdgpu_ps <4 x float> @buffer_load_both_reversed(<4 x i32> inreg, i32, i32) {
main_body:
%data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %2, i32 %1, i1 0, i1 0)
ret <4 x float> %data
@@ -107,7 +107,7 @@ main_body:
;CHECK-LABEL: {{^}}buffer_load_x:
;CHECK: buffer_load_format_x v0, s[0:3], 0
;CHECK: s_waitcnt
-define float @buffer_load_x(<4 x i32> inreg %rsrc) #0 {
+define amdgpu_ps float @buffer_load_x(<4 x i32> inreg %rsrc) {
main_body:
%data = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %rsrc, i32 0, i32 0, i1 0, i1 0)
ret float %data
@@ -116,15 +116,14 @@ main_body:
;CHECK-LABEL: {{^}}buffer_load_xy:
;CHECK: buffer_load_format_xy v[0:1], s[0:3], 0
;CHECK: s_waitcnt
-define <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) #0 {
+define amdgpu_ps <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) {
main_body:
%data = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 0, i32 0, i1 0, i1 0)
ret <2 x float> %data
}
-declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) #1
-declare <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32>, i32, i32, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1
+declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) #0
+declare <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32>, i32, i32, i1, i1) #0
+declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #0
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readonly }
+attributes #0 = { nounwind readonly }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll Wed Apr 6 14:40:20 2016
@@ -5,7 +5,7 @@
;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], 0
;CHECK: buffer_store_format_xyzw v[4:7], s[0:3], 0 glc
;CHECK: buffer_store_format_xyzw v[8:11], s[0:3], 0 slc
-define void @buffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) #0 {
+define amdgpu_ps void @buffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) {
main_body:
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 0, i1 0, i1 0)
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i1 1, i1 0)
@@ -15,7 +15,7 @@ main_body:
;CHECK-LABEL: {{^}}buffer_store_immoffs:
;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], 0 offset:42
-define void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) #0 {
+define amdgpu_ps void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) {
main_body:
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 42, i1 0, i1 0)
ret void
@@ -23,7 +23,7 @@ main_body:
;CHECK-LABEL: {{^}}buffer_store_idx:
;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen
-define void @buffer_store_idx(<4 x i32> inreg, <4 x float>, i32) #0 {
+define amdgpu_ps void @buffer_store_idx(<4 x i32> inreg, <4 x float>, i32) {
main_body:
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i1 0, i1 0)
ret void
@@ -31,7 +31,7 @@ main_body:
;CHECK-LABEL: {{^}}buffer_store_ofs:
;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 offen
-define void @buffer_store_ofs(<4 x i32> inreg, <4 x float>, i32) #0 {
+define amdgpu_ps void @buffer_store_ofs(<4 x i32> inreg, <4 x float>, i32) {
main_body:
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 %2, i1 0, i1 0)
ret void
@@ -39,7 +39,7 @@ main_body:
;CHECK-LABEL: {{^}}buffer_store_both:
;CHECK: buffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 idxen offen
-define void @buffer_store_both(<4 x i32> inreg, <4 x float>, i32, i32) #0 {
+define amdgpu_ps void @buffer_store_both(<4 x i32> inreg, <4 x float>, i32, i32) {
main_body:
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 %3, i1 0, i1 0)
ret void
@@ -48,7 +48,7 @@ main_body:
;CHECK-LABEL: {{^}}buffer_store_both_reversed:
;CHECK: v_mov_b32_e32 v6, v4
;CHECK: buffer_store_format_xyzw v[0:3], v[5:6], s[0:3], 0 idxen offen
-define void @buffer_store_both_reversed(<4 x i32> inreg, <4 x float>, i32, i32) #0 {
+define amdgpu_ps void @buffer_store_both_reversed(<4 x i32> inreg, <4 x float>, i32, i32) {
main_body:
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %3, i32 %2, i1 0, i1 0)
ret void
@@ -62,7 +62,7 @@ main_body:
;CHECK: buffer_load_format_xyzw v[0:3], v5, s[0:3], 0 idxen
;CHECK: s_waitcnt vmcnt(0)
;CHECK: buffer_store_format_xyzw v[0:3], v6, s[0:3], 0 idxen
-define void @buffer_store_wait(<4 x i32> inreg, <4 x float>, i32, i32, i32) #0 {
+define amdgpu_ps void @buffer_store_wait(<4 x i32> inreg, <4 x float>, i32, i32, i32) {
main_body:
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i1 0, i1 0)
%data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %3, i32 0, i1 0, i1 0)
@@ -70,9 +70,8 @@ main_body:
ret void
}
-declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #2
+declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #0
+declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind readonly }
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.ll Wed Apr 6 14:40:20 2016
@@ -5,7 +5,7 @@
;SI: image_atomic_swap v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x3c,0xf0,0x00,0x04,0x00,0x00]
;VI: image_atomic_swap v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x40,0xf0,0x00,0x04,0x00,0x00]
;CHECK: s_waitcnt vmcnt(0)
-define float @image_atomic_swap(<8 x i32> inreg, <4 x i32>, i32) #0 {
+define amdgpu_ps float @image_atomic_swap(<8 x i32> inreg, <4 x i32>, i32) {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.swap.v4i32(i32 %2, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0)
%orig.f = bitcast i32 %orig to float
@@ -16,7 +16,7 @@ main_body:
;SI: image_atomic_swap v2, v[0:1], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x3c,0xf0,0x00,0x02,0x00,0x00]
;VI: image_atomic_swap v2, v[0:1], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x40,0xf0,0x00,0x02,0x00,0x00]
;CHECK: s_waitcnt vmcnt(0)
-define float @image_atomic_swap_v2i32(<8 x i32> inreg, <2 x i32>, i32) #0 {
+define amdgpu_ps float @image_atomic_swap_v2i32(<8 x i32> inreg, <2 x i32>, i32) {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.swap.v2i32(i32 %2, <2 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0)
%orig.f = bitcast i32 %orig to float
@@ -27,7 +27,7 @@ main_body:
;SI: image_atomic_swap v1, v0, s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x3c,0xf0,0x00,0x01,0x00,0x00]
;VI: image_atomic_swap v1, v0, s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x40,0xf0,0x00,0x01,0x00,0x00]
;CHECK: s_waitcnt vmcnt(0)
-define float @image_atomic_swap_i32(<8 x i32> inreg, i32, i32) #0 {
+define amdgpu_ps float @image_atomic_swap_i32(<8 x i32> inreg, i32, i32) {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.swap.i32(i32 %2, i32 %1, <8 x i32> %0, i1 0, i1 0, i1 0)
%orig.f = bitcast i32 %orig to float
@@ -39,7 +39,7 @@ main_body:
;VI: image_atomic_cmpswap v[4:5], v[0:3], s[0:7] dmask:0x3 unorm glc ; encoding: [0x00,0x33,0x44,0xf0,0x00,0x04,0x00,0x00]
;CHECK: s_waitcnt vmcnt(0)
;CHECK: v_mov_b32_e32 v0, v4
-define float @image_atomic_cmpswap(<8 x i32> inreg, <4 x i32>, i32, i32) #0 {
+define amdgpu_ps float @image_atomic_cmpswap(<8 x i32> inreg, <4 x i32>, i32, i32) {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.v4i32(i32 %2, i32 %3, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0)
%orig.f = bitcast i32 %orig to float
@@ -50,7 +50,7 @@ main_body:
;SI: image_atomic_add v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x44,0xf0,0x00,0x04,0x00,0x00]
;VI: image_atomic_add v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x48,0xf0,0x00,0x04,0x00,0x00]
;CHECK: s_waitcnt vmcnt(0)
-define float @image_atomic_add(<8 x i32> inreg, <4 x i32>, i32) #0 {
+define amdgpu_ps float @image_atomic_add(<8 x i32> inreg, <4 x i32>, i32) {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.add.v4i32(i32 %2, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0)
%orig.f = bitcast i32 %orig to float
@@ -61,7 +61,7 @@ main_body:
;SI: image_atomic_sub v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x48,0xf0,0x00,0x04,0x00,0x00]
;VI: image_atomic_sub v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x4c,0xf0,0x00,0x04,0x00,0x00]
;CHECK: s_waitcnt vmcnt(0)
-define float @image_atomic_sub(<8 x i32> inreg, <4 x i32>, i32) #0 {
+define amdgpu_ps float @image_atomic_sub(<8 x i32> inreg, <4 x i32>, i32) {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.sub.v4i32(i32 %2, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0)
%orig.f = bitcast i32 %orig to float
@@ -87,7 +87,7 @@ main_body:
;CHECK: s_waitcnt vmcnt(0)
;CHECK: image_atomic_dec v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x70,0xf0,0x00,0x04,0x00,0x00]
;CHECK: s_waitcnt vmcnt(0)
-define float @image_atomic_unchanged(<8 x i32> inreg, <4 x i32>, i32) #0 {
+define amdgpu_ps float @image_atomic_unchanged(<8 x i32> inreg, <4 x i32>, i32) {
main_body:
%t0 = call i32 @llvm.amdgcn.image.atomic.smin.v4i32(i32 %2, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0)
%t1 = call i32 @llvm.amdgcn.image.atomic.umin.v4i32(i32 %t0, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0)
@@ -102,23 +102,22 @@ main_body:
ret float %out
}
-declare i32 @llvm.amdgcn.image.atomic.swap.i32(i32, i32, <8 x i32>, i1, i1, i1) #1
-declare i32 @llvm.amdgcn.image.atomic.swap.v2i32(i32, <2 x i32>, <8 x i32>, i1, i1, i1) #1
-declare i32 @llvm.amdgcn.image.atomic.swap.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
-
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.v4i32(i32, i32, <4 x i32>, <8 x i32>,i1, i1, i1) #1
-
-declare i32 @llvm.amdgcn.image.atomic.add.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
-declare i32 @llvm.amdgcn.image.atomic.sub.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
-declare i32 @llvm.amdgcn.image.atomic.smin.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
-declare i32 @llvm.amdgcn.image.atomic.umin.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
-declare i32 @llvm.amdgcn.image.atomic.smax.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
-declare i32 @llvm.amdgcn.image.atomic.umax.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
-declare i32 @llvm.amdgcn.image.atomic.and.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
-declare i32 @llvm.amdgcn.image.atomic.or.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
-declare i32 @llvm.amdgcn.image.atomic.xor.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
-declare i32 @llvm.amdgcn.image.atomic.inc.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
-declare i32 @llvm.amdgcn.image.atomic.dec.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
+declare i32 @llvm.amdgcn.image.atomic.swap.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
+declare i32 @llvm.amdgcn.image.atomic.swap.v2i32(i32, <2 x i32>, <8 x i32>, i1, i1, i1) #0
+declare i32 @llvm.amdgcn.image.atomic.swap.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
+
+declare i32 @llvm.amdgcn.image.atomic.cmpswap.v4i32(i32, i32, <4 x i32>, <8 x i32>,i1, i1, i1) #0
+
+declare i32 @llvm.amdgcn.image.atomic.add.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
+declare i32 @llvm.amdgcn.image.atomic.sub.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
+declare i32 @llvm.amdgcn.image.atomic.smin.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
+declare i32 @llvm.amdgcn.image.atomic.umin.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
+declare i32 @llvm.amdgcn.image.atomic.smax.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
+declare i32 @llvm.amdgcn.image.atomic.umax.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
+declare i32 @llvm.amdgcn.image.atomic.and.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
+declare i32 @llvm.amdgcn.image.atomic.or.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
+declare i32 @llvm.amdgcn.image.atomic.xor.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
+declare i32 @llvm.amdgcn.image.atomic.inc.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
+declare i32 @llvm.amdgcn.image.atomic.dec.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind }
+attributes #0 = { nounwind }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll Wed Apr 6 14:40:20 2016
@@ -4,7 +4,7 @@
;CHECK-LABEL: {{^}}image_load_v4i32:
;CHECK: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm
;CHECK: s_waitcnt vmcnt(0)
-define <4 x float> @image_load_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
+define amdgpu_ps <4 x float> @image_load_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) {
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
ret <4 x float> %tex
@@ -13,7 +13,7 @@ main_body:
;CHECK-LABEL: {{^}}image_load_v2i32:
;CHECK: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm
;CHECK: s_waitcnt vmcnt(0)
-define <4 x float> @image_load_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) #0 {
+define amdgpu_ps <4 x float> @image_load_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) {
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.load.v2i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
ret <4 x float> %tex
@@ -22,7 +22,7 @@ main_body:
;CHECK-LABEL: {{^}}image_load_i32:
;CHECK: image_load v[0:3], v0, s[0:7] dmask:0xf unorm
;CHECK: s_waitcnt vmcnt(0)
-define <4 x float> @image_load_i32(<8 x i32> inreg %rsrc, i32 %c) #0 {
+define amdgpu_ps <4 x float> @image_load_i32(<8 x i32> inreg %rsrc, i32 %c) {
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.load.i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
ret <4 x float> %tex
@@ -31,7 +31,7 @@ main_body:
;CHECK-LABEL: {{^}}image_load_mip:
;CHECK: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm
;CHECK: s_waitcnt vmcnt(0)
-define <4 x float> @image_load_mip(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
+define amdgpu_ps <4 x float> @image_load_mip(<8 x i32> inreg %rsrc, <4 x i32> %c) {
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.load.mip.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
ret <4 x float> %tex
@@ -40,7 +40,7 @@ main_body:
;CHECK-LABEL: {{^}}image_load_1:
;CHECK: image_load v0, v[0:3], s[0:7] dmask:0x1 unorm
;CHECK: s_waitcnt vmcnt(0)
-define float @image_load_1(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
+define amdgpu_ps float @image_load_1(<8 x i32> inreg %rsrc, <4 x i32> %c) {
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
%elt = extractelement <4 x float> %tex, i32 0
@@ -50,7 +50,7 @@ main_body:
;CHECK-LABEL: {{^}}image_store_v4i32:
;CHECK: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm
-define void @image_store_v4i32(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) #0 {
+define amdgpu_ps void @image_store_v4i32(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) {
main_body:
call void @llvm.amdgcn.image.store.v4i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
ret void
@@ -58,7 +58,7 @@ main_body:
;CHECK-LABEL: {{^}}image_store_v2i32:
;CHECK: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm
-define void @image_store_v2i32(<8 x i32> inreg %rsrc, <4 x float> %data, <2 x i32> %coords) #0 {
+define amdgpu_ps void @image_store_v2i32(<8 x i32> inreg %rsrc, <4 x float> %data, <2 x i32> %coords) {
main_body:
call void @llvm.amdgcn.image.store.v2i32(<4 x float> %data, <2 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
ret void
@@ -66,7 +66,7 @@ main_body:
;CHECK-LABEL: {{^}}image_store_i32:
;CHECK: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
-define void @image_store_i32(<8 x i32> inreg %rsrc, <4 x float> %data, i32 %coords) #0 {
+define amdgpu_ps void @image_store_i32(<8 x i32> inreg %rsrc, <4 x float> %data, i32 %coords) {
main_body:
call void @llvm.amdgcn.image.store.i32(<4 x float> %data, i32 %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
ret void
@@ -74,7 +74,7 @@ main_body:
;CHECK-LABEL: {{^}}image_store_mip:
;CHECK: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm
-define void @image_store_mip(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) #0 {
+define amdgpu_ps void @image_store_mip(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) {
main_body:
call void @llvm.amdgcn.image.store.mip.v4i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
ret void
@@ -88,7 +88,7 @@ main_body:
;CHECK: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
;CHECK: s_waitcnt vmcnt(0)
;CHECK: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
-define void @image_store_wait(<8 x i32> inreg, <8 x i32> inreg, <8 x i32> inreg, <4 x float>, i32) #0 {
+define amdgpu_ps void @image_store_wait(<8 x i32> inreg, <8 x i32> inreg, <8 x i32> inreg, <4 x float>, i32) {
main_body:
call void @llvm.amdgcn.image.store.i32(<4 x float> %3, i32 %4, <8 x i32> %0, i32 15, i1 0, i1 0, i1 0, i1 0)
%data = call <4 x float> @llvm.amdgcn.image.load.i32(i32 %4, <8 x i32> %1, i32 15, i1 0, i1 0, i1 0, i1 0)
@@ -96,16 +96,15 @@ main_body:
ret void
}
-declare void @llvm.amdgcn.image.store.i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #1
-declare void @llvm.amdgcn.image.store.v2i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
-declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
-declare void @llvm.amdgcn.image.store.mip.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
-
-declare <4 x float> @llvm.amdgcn.image.load.i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #2
-declare <4 x float> @llvm.amdgcn.image.load.v2i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
-declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
-declare <4 x float> @llvm.amdgcn.image.load.mip.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
-
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind readonly }
+declare void @llvm.amdgcn.image.store.i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0
+declare void @llvm.amdgcn.image.store.v2i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
+declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
+declare void @llvm.amdgcn.image.store.mip.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
+
+declare <4 x float> @llvm.amdgcn.image.load.i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.load.v2i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll Wed Apr 6 14:40:20 2016
@@ -6,7 +6,7 @@
;GCN: s_mov_b32 m0, s{{[0-9]+}}
;GCN: v_interp_p1_f32
;GCN: v_interp_p2_f32
-define void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) #0 {
+define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) {
main_body:
%i = extractelement <2 x i32> %4, i32 0
%j = extractelement <2 x i32> %4, i32 1
@@ -19,12 +19,11 @@ main_body:
}
; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.p1(i32, i32, i32, i32) #1
+declare float @llvm.amdgcn.interp.p1(i32, i32, i32, i32) #0
; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.p2(float, i32, i32, i32, i32) #1
+declare float @llvm.amdgcn.interp.p2(float, i32, i32, i32, i32) #0
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
+attributes #0 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll Wed Apr 6 14:40:20 2016
@@ -6,7 +6,7 @@
;SI: v_mbcnt_hi_u32_b32_e32 {{v[0-9]+}}, -1, [[LO]]
;VI: v_mbcnt_hi_u32_b32_e64 {{v[0-9]+}}, -1, [[LO]]
-define void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
+define amdgpu_ps void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) {
main_body:
%lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
%hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) #1
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgpu.kilp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgpu.kilp.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgpu.kilp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgpu.kilp.ll Wed Apr 6 14:40:20 2016
@@ -3,7 +3,7 @@
; SI-LABEL: {{^}}kilp_gs_const:
; SI: s_mov_b64 exec, 0
-define void @kilp_gs_const() #0 {
+define amdgpu_gs void @kilp_gs_const() {
main_body:
%0 = icmp ule i32 0, 3
%1 = select i1 %0, float 1.000000e+00, float -1.000000e+00
@@ -16,6 +16,4 @@ main_body:
declare void @llvm.AMDGPU.kilp(float)
-attributes #0 = { "ShaderType"="2" }
-
!0 = !{!"const", null, i32 1}
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.cos.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.cos.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.cos.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.cos.ll Wed Apr 6 14:40:20 2016
@@ -37,5 +37,3 @@ define void @testv(<4 x float> addrspace
declare float @llvm.cos.f32(float) readnone
declare <4 x float> @llvm.cos.v4f32(<4 x float>) readnone
-
-attributes #0 = { "ShaderType"="0" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.pow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.pow.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.pow.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.pow.ll Wed Apr 6 14:40:20 2016
@@ -5,7 +5,7 @@
;CHECK-NEXT: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}},
;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}},
-define void @test1(<4 x float> inreg %reg0) #0 {
+define amdgpu_ps void @test1(<4 x float> inreg %reg0) {
%r0 = extractelement <4 x float> %reg0, i32 0
%r1 = extractelement <4 x float> %reg0, i32 1
%r2 = call float @llvm.pow.f32( float %r0, float %r1)
@@ -27,7 +27,7 @@ define void @test1(<4 x float> inreg %re
;CHECK-NEXT: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}},
;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}},
;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}},
-define void @test2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
+define amdgpu_ps void @test2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) {
%vec = call <4 x float> @llvm.pow.v4f32( <4 x float> %reg0, <4 x float> %reg1)
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
@@ -36,5 +36,3 @@ define void @test2(<4 x float> inreg %re
declare float @llvm.pow.f32(float ,float ) readonly
declare <4 x float> @llvm.pow.v4f32(<4 x float> ,<4 x float> ) readonly
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="0" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.sin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.sin.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.sin.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.sin.ll Wed Apr 6 14:40:20 2016
@@ -88,5 +88,3 @@ define void @sin_v4f32(<4 x float> addrs
declare float @llvm.sin.f32(float) readnone
declare <4 x float> @llvm.sin.v4f32(<4 x float>) readnone
-
-attributes #0 = { "ShaderType"="0" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-input-fold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-input-fold.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-input-fold.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-input-fold.ll Wed Apr 6 14:40:20 2016
@@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=cayman
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 {
+define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) {
main_body:
%0 = extractelement <4 x float> %reg1, i32 0
%1 = extractelement <4 x float> %reg1, i32 1
@@ -111,7 +111,6 @@ declare float @llvm.pow.f32(float, float
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-attributes #0 = { "ShaderType"="1" }
attributes #1 = { readnone }
attributes #2 = { readonly }
attributes #3 = { nounwind readonly }
Modified: llvm/trunk/test/CodeGen/AMDGPU/m0-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/m0-spill.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/m0-spill.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/m0-spill.ll Wed Apr 6 14:40:20 2016
@@ -5,7 +5,7 @@
; CHECK-LABEL: {{^}}main:
; CHECK-NOT: v_readlane_b32 m0
-define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
+define amdgpu_ps void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) {
main_body:
%4 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
%cmp = fcmp ueq float 0.0, %4
Modified: llvm/trunk/test/CodeGen/AMDGPU/max-literals.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/max-literals.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/max-literals.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/max-literals.ll Wed Apr 6 14:40:20 2016
@@ -3,7 +3,7 @@
; CHECK-LABEL: {{^}}main:
; CHECK: ADD *
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
+define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) {
main_body:
%0 = extractelement <4 x float> %reg1, i32 0
%1 = extractelement <4 x float> %reg1, i32 1
@@ -32,7 +32,7 @@ main_body:
; CHECK-LABEL: {{^}}main2:
; CHECK-NOT: ADD *
-define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
+define amdgpu_vs void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) {
main_body:
%0 = extractelement <4 x float> %reg1, i32 0
%1 = extractelement <4 x float> %reg1, i32 1
@@ -63,5 +63,4 @@ declare float @llvm.AMDGPU.dp4(<4 x floa
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-attributes #0 = { "ShaderType"="1" }
attributes #1 = { readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/mubuf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mubuf.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mubuf.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mubuf.ll Wed Apr 6 14:40:20 2016
@@ -55,7 +55,7 @@ entry:
; CHECK-LABEL: {{^}}soffset_max_imm:
; CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 64 offen glc
-define void @soffset_max_imm([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 {
+define amdgpu_gs void @soffset_max_imm([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
main_body:
%tmp0 = getelementptr [6 x <16 x i8>], [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
%tmp1 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp0
@@ -74,7 +74,7 @@ main_body:
; CHECK-LABEL: {{^}}soffset_no_fold:
; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x41
; CHECK: buffer_load_dword v{{[0-9+]}}, v{{[0-9+]}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc
-define void @soffset_no_fold([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 {
+define amdgpu_gs void @soffset_no_fold([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
main_body:
%tmp0 = getelementptr [6 x <16 x i8>], [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
%tmp1 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp0
@@ -179,5 +179,5 @@ define void @store_vgpr_ptr(i32 addrspac
declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #3
declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
-attributes #1 = { "ShaderType"="2" "unsafe-fp-math"="true" }
+attributes #1 = { "unsafe-fp-math"="true" }
attributes #3 = { nounwind readonly }
Modified: llvm/trunk/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll Wed Apr 6 14:40:20 2016
@@ -1,18 +1,14 @@
; RUN: not llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa < %s 2>&1 | FileCheck %s
; CHECK: in function pixel_s{{.*}}: unsupported non-compute shaders with HSA
-define void @pixel_shader() #0 {
+define amdgpu_ps void @pixel_shader() #0 {
ret void
}
-define void @vertex_shader() #1 {
+define amdgpu_vs void @vertex_shader() #0 {
ret void
}
-define void @geometry_shader() #2 {
+define amdgpu_gs void @geometry_shader() #0 {
ret void
}
-
-attributes #0 = { nounwind "ShaderType"="0" }
-attributes #1 = { nounwind "ShaderType"="1" }
-attributes #2 = { nounwind "ShaderType"="2" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/predicate-dp4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/predicate-dp4.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/predicate-dp4.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/predicate-dp4.ll Wed Apr 6 14:40:20 2016
@@ -3,7 +3,7 @@
; CHECK-LABEL: {{^}}main:
; CHECK: PRED_SETE_INT * Pred,
; CHECK: DOT4 T{{[0-9]+}}.X, T0.X, T0.X, Pred_sel_one
-define void @main(<4 x float> inreg) #0 {
+define amdgpu_ps void @main(<4 x float> inreg) {
main_body:
%1 = extractelement <4 x float> %0, i32 0
%2 = bitcast float %1 to i32
@@ -24,4 +24,3 @@ ENDIF:
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
attributes #1 = { readnone }
-attributes #0 = { "ShaderType"="0" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/pv-packing.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/pv-packing.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/pv-packing.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/pv-packing.ll Wed Apr 6 14:40:20 2016
@@ -3,7 +3,7 @@
;CHECK: DOT4 T{{[0-9]\.X}}
;CHECK: MULADD_IEEE * T{{[0-9]\.W}}
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 {
+define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) {
main_body:
%0 = extractelement <4 x float> %reg1, i32 0
%1 = extractelement <4 x float> %reg1, i32 1
@@ -41,5 +41,4 @@ declare float @llvm.AMDGPU.dp4(<4 x floa
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-attributes #0 = { "ShaderType"="1" }
attributes #1 = { readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/pv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/pv.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/pv.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/pv.ll Wed Apr 6 14:40:20 2016
@@ -3,7 +3,7 @@
; CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
; CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7) #0 {
+define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7) {
main_body:
%0 = extractelement <4 x float> %reg1, i32 0
%1 = extractelement <4 x float> %reg1, i32 1
@@ -235,7 +235,6 @@ declare float @llvm.pow.f32(float, float
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-attributes #0 = { "ShaderType"="1" }
attributes #1 = { readnone }
attributes #2 = { readonly }
attributes #3 = { nounwind readonly }
Modified: llvm/trunk/test/CodeGen/AMDGPU/r600-encoding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/r600-encoding.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/r600-encoding.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/r600-encoding.ll Wed Apr 6 14:40:20 2016
@@ -10,7 +10,7 @@
; R600: {{^}}test:
; R600: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x02,0x[0-9a-f]+,0x[0-9a-f]+}}]
-define void @test(<4 x float> inreg %reg0) #0 {
+define amdgpu_ps void @test(<4 x float> inreg %reg0) {
entry:
%r0 = extractelement <4 x float> %reg0, i32 0
%r1 = extractelement <4 x float> %reg0, i32 1
@@ -21,5 +21,3 @@ entry:
}
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="0" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/r600-export-fix.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/r600-export-fix.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/r600-export-fix.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/r600-export-fix.ll Wed Apr 6 14:40:20 2016
@@ -10,7 +10,7 @@
;CHECK: EXPORT T{{[0-9]}}.0000
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
+define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) {
main_body:
%0 = extractelement <4 x float> %reg1, i32 0
%1 = extractelement <4 x float> %reg1, i32 1
@@ -138,5 +138,3 @@ main_body:
}
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll Wed Apr 6 14:40:20 2016
@@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=cayman
-define void @main(<4 x float> inreg, <4 x float> inreg) #0 {
+define amdgpu_ps void @main(<4 x float> inreg, <4 x float> inreg) {
main_body:
%2 = extractelement <4 x float> %0, i32 0
%3 = extractelement <4 x float> %0, i32 1
@@ -54,5 +54,4 @@ declare <4 x float> @llvm.AMDGPU.tex(<4
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-attributes #0 = { "ShaderType"="0" }
attributes #1 = { readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/r600cfg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/r600cfg.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/r600cfg.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/r600cfg.ll Wed Apr 6 14:40:20 2016
@@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=redwood
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
+define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) {
main_body:
%0 = extractelement <4 x float> %reg1, i32 0
%1 = extractelement <4 x float> %reg1, i32 1
@@ -115,5 +115,3 @@ ENDIF48:
declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32)
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/reciprocal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/reciprocal.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/reciprocal.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/reciprocal.ll Wed Apr 6 14:40:20 2016
@@ -2,7 +2,7 @@
;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @test(<4 x float> inreg %reg0) #0 {
+define amdgpu_ps void @test(<4 x float> inreg %reg0) {
%r0 = extractelement <4 x float> %reg0, i32 0
%r1 = fdiv float 1.0, %r0
%vec = insertelement <4 x float> undef, float %r1, i32 0
@@ -11,5 +11,3 @@ define void @test(<4 x float> inreg %reg
}
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="0" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/ret.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ret.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ret.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ret.ll Wed Apr 6 14:40:20 2016
@@ -1,8 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-attributes #0 = { "ShaderType"="1" }
-
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
; GCN-LABEL: {{^}}vgpr:
@@ -11,7 +9,7 @@ declare void @llvm.SI.export(i32, i32, i
; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1
; GCN: s_waitcnt expcnt(0)
; GCN-NOT: s_endpgm
-define {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
+define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
%x = fadd float %3, 1.0
%a = insertvalue {float, float} undef, float %x, 0
@@ -28,7 +26,7 @@ define {float, float} @vgpr([9 x <16 x i
; GCN: exp 15, 0, 1, 1, 1, v4, v4, v4, v4
; GCN: s_waitcnt expcnt(0)
; GCN-NOT: s_endpgm
-define {float, float, float, float} @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
+define amdgpu_vs {float, float, float, float} @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
ret {float, float, float, float} {float 1.0, float 2.0, float 4.0, float -1.0}
}
@@ -45,8 +43,8 @@ define {float, float, float, float} @vgp
; GCN: v_mov_b32_e32 v3, v4
; GCN: v_mov_b32_e32 v4, v6
; GCN-NOT: s_endpgm
-attributes #1 = { "ShaderType"="0" "InitialPSInputAddr"="0" }
-define {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
+attributes #0 = { "InitialPSInputAddr"="0" }
+define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
%i0 = extractelement <2 x i32> %4, i32 0
%i1 = extractelement <2 x i32> %4, i32 1
%i2 = extractelement <2 x i32> %7, i32 0
@@ -71,7 +69,7 @@ define {float, float, float, float, floa
; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
; GCN: v_mov_b32_e32 v0, 1.0
; GCN-NOT: s_endpgm
-define float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
+define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
ret float 1.0
}
@@ -85,7 +83,7 @@ define float @ps_input_ena_no_inputs([9
; GCN-DAG: v_mov_b32_e32 v1, v2
; GCN: v_mov_b32_e32 v2, v3
; GCN-NOT: s_endpgm
-define {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
+define amdgpu_ps {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
%f = bitcast <2 x i32> %8 to <2 x float>
%s = insertvalue {float, <2 x float>} undef, float %14, 0
%s1 = insertvalue {float, <2 x float>} %s, <2 x float> %f, 1
@@ -104,8 +102,8 @@ define {float, <2 x float>} @ps_input_en
; GCN-DAG: v_mov_b32_e32 v3, v6
; GCN-DAG: v_mov_b32_e32 v4, v8
; GCN-NOT: s_endpgm
-attributes #2 = { "ShaderType"="0" "InitialPSInputAddr"="1" }
-define {float, float, float, float, float} @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #2 {
+attributes #1 = { "InitialPSInputAddr"="1" }
+define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
%i0 = extractelement <2 x i32> %4, i32 0
%i1 = extractelement <2 x i32> %4, i32 1
%i2 = extractelement <2 x i32> %7, i32 0
@@ -134,8 +132,8 @@ define {float, float, float, float, floa
; GCN: v_mov_b32_e32 v3, v8
; GCN: v_mov_b32_e32 v4, v12
; GCN-NOT: s_endpgm
-attributes #3 = { "ShaderType"="0" "InitialPSInputAddr"="119" }
-define {float, float, float, float, float} @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #3 {
+attributes #2 = { "InitialPSInputAddr"="119" }
+define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #2 {
%i0 = extractelement <2 x i32> %4, i32 0
%i1 = extractelement <2 x i32> %4, i32 1
%i2 = extractelement <2 x i32> %7, i32 0
@@ -164,8 +162,8 @@ define {float, float, float, float, floa
; GCN: v_mov_b32_e32 v3, v4
; GCN: v_mov_b32_e32 v4, v8
; GCN-NOT: s_endpgm
-attributes #4 = { "ShaderType"="0" "InitialPSInputAddr"="418" }
-define {float, float, float, float, float} @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #4 {
+attributes #3 = { "InitialPSInputAddr"="418" }
+define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #3 {
%i0 = extractelement <2 x i32> %4, i32 0
%i1 = extractelement <2 x i32> %4, i32 1
%i2 = extractelement <2 x i32> %7, i32 0
@@ -187,7 +185,7 @@ define {float, float, float, float, floa
; GCN: s_add_i32 s0, s3, 2
; GCN: s_mov_b32 s2, s3
; GCN-NOT: s_endpgm
-define {i32, i32, i32} @sgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
+define amdgpu_vs {i32, i32, i32} @sgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
%x = add i32 %2, 2
%a = insertvalue {i32, i32, i32} undef, i32 %x, 0
%b = insertvalue {i32, i32, i32} %a, i32 %1, 1
@@ -203,7 +201,7 @@ define {i32, i32, i32} @sgpr([9 x <16 x
; GCN-DAG: s_mov_b32 s2, 7
; GCN-DAG: s_mov_b32 s3, 8
; GCN-NOT: s_endpgm
-define {i32, i32, i32, i32} @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
+define amdgpu_vs {i32, i32, i32, i32} @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
%x = add i32 %2, 2
ret {i32, i32, i32, i32} {i32 5, i32 6, i32 7, i32 8}
}
@@ -218,7 +216,7 @@ define {i32, i32, i32, i32} @sgpr_litera
; GCN: s_mov_b32 s2, s3
; GCN: s_waitcnt expcnt(0)
; GCN-NOT: s_endpgm
-define {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
+define amdgpu_vs {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
%v = fadd float %3, 1.0
%s = add i32 %2, 2
@@ -239,7 +237,7 @@ define {float, i32, float, i32, i32} @bo
; GCN-DAG: v_mov_b32_e32 v1, 2.0
; GCN-DAG: v_mov_b32_e32 v2, 4.0
; GCN-DAG: exp 15, 0, 1, 1, 1, v3, v3, v3, v3
-define {{float, i32}, {i32, <2 x float>}} @structure_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
+define amdgpu_vs {{float, i32}, {i32, <2 x float>}} @structure_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
ret {{float, i32}, {i32, <2 x float>}} {{float, i32} {float 1.0, i32 2}, {i32, <2 x float>} {i32 3, <2 x float> <float 2.0, float 4.0>}}
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/ret_jump.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ret_jump.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ret_jump.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ret_jump.ll Wed Apr 6 14:40:20 2016
@@ -12,7 +12,7 @@ target triple = "amdgcn--"
; ModuleID = 'bugpoint-reduced-simplified.bc'
target triple = "amdgcn--"
-define <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <8 x i32>] addrspace(2)* byval, i32 addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
+define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <8 x i32>] addrspace(2)* byval, i32 addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
%p83 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%p87 = fmul float undef, %p83
@@ -53,5 +53,5 @@ declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
-attributes #0 = { "InitialPSInputAddr"="36983" "ShaderType"="0" }
+attributes #0 = { "InitialPSInputAddr"="36983" }
attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/rv7x0_count3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/rv7x0_count3.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/rv7x0_count3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/rv7x0_count3.ll Wed Apr 6 14:40:20 2016
@@ -2,7 +2,7 @@
; CHECK: TEX 9 @6 ; encoding: [0x06,0x00,0x00,0x00,0x00,0x04,0x88,0x80]
-define void @test(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
+define amdgpu_vs void @test(<4 x float> inreg %reg0, <4 x float> inreg %reg1) {
%1 = extractelement <4 x float> %reg1, i32 0
%2 = extractelement <4 x float> %reg1, i32 1
%3 = extractelement <4 x float> %reg1, i32 2
@@ -37,5 +37,3 @@ define void @test(<4 x float> inreg %reg
declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/schedule-fs-loop-nested-if.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/schedule-fs-loop-nested-if.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/schedule-fs-loop-nested-if.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/schedule-fs-loop-nested-if.ll Wed Apr 6 14:40:20 2016
@@ -1,7 +1,7 @@
;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
;REQUIRES: asserts
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #1 {
+define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) {
main_body:
%0 = extractelement <4 x float> %reg1, i32 0
%1 = extractelement <4 x float> %reg1, i32 1
@@ -79,4 +79,3 @@ declare float @llvm.AMDGPU.clamp.f32(flo
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
attributes #0 = { readnone }
-attributes #1 = { "ShaderType"="1" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll Wed Apr 6 14:40:20 2016
@@ -7,7 +7,7 @@ declare void @llvm.AMDGPU.barrier.local(
; SI-LABEL: {{^}}main(
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
+define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) {
main_body:
%0 = extractelement <4 x float> %reg1, i32 0
%1 = extractelement <4 x float> %reg1, i32 2
@@ -159,5 +159,3 @@ ENDIF19:
%115 = fadd float %temp4.0, 1.000000e+00
br label %Flow1
}
-
-attributes #0 = { "ShaderType"="1" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop.ll Wed Apr 6 14:40:20 2016
@@ -1,7 +1,7 @@
;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
;REQUIRES: asserts
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
+define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) {
main_body:
%0 = extractelement <4 x float> %reg1, i32 0
%1 = extractelement <4 x float> %reg1, i32 1
@@ -128,5 +128,3 @@ ENDIF19:
}
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll Wed Apr 6 14:40:20 2016
@@ -11,7 +11,7 @@ declare <4 x float> @llvm.SI.image.sampl
; CHECK-LABEL: {{^}}phi1:
; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]]
-define void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -35,7 +35,7 @@ ENDIF:
; Make sure this program doesn't crash
; CHECK-LABEL: {{^}}phi2:
-define void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -156,7 +156,7 @@ ENDIF24:
; We just want ot make sure the program doesn't crash
; CHECK-LABEL: {{^}}loop:
-define void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -227,7 +227,7 @@ declare i32 @llvm.SI.packf16(float, floa
; CHECK: image_sample
; CHECK: exp
; CHECK: s_endpgm
-define void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define amdgpu_ps void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
entry:
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
%tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -291,7 +291,7 @@ endif:
; This test is just checking that we don't crash / assertion fail.
; CHECK-LABEL: {{^}}copy2:
; CHECK: s_endpgm
-define void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define amdgpu_ps void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
entry:
br label %LOOP68
@@ -321,7 +321,7 @@ ENDIF69:
; CHECK: image_sample
; CHECK: image_sample
; CHECK: s_endpgm
-define void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
+define amdgpu_ps void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
bb:
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0
%tmp22 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !2
@@ -365,7 +365,7 @@ bb71:
; Check the the resource descriptor is stored in an sgpr.
; CHECK-LABEL: {{^}}mimg_srsrc_sgpr:
; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
-define void @mimg_srsrc_sgpr([34 x <8 x i32>] addrspace(2)* byval %arg) #0 {
+define amdgpu_ps void @mimg_srsrc_sgpr([34 x <8 x i32>] addrspace(2)* byval %arg) #0 {
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %arg, i32 0, i32 %tid
%tmp8 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp7, align 32, !tbaa !0
@@ -380,7 +380,7 @@ define void @mimg_srsrc_sgpr([34 x <8 x
; Check the the sampler is stored in an sgpr.
; CHECK-LABEL: {{^}}mimg_ssamp_sgpr:
; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
-define void @mimg_ssamp_sgpr([17 x <4 x i32>] addrspace(2)* byval %arg) #0 {
+define amdgpu_ps void @mimg_ssamp_sgpr([17 x <4 x i32>] addrspace(2)* byval %arg) #0 {
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp7 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i32 0, i32 %tid
%tmp8 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp7, align 16, !tbaa !0
@@ -394,7 +394,7 @@ define void @mimg_ssamp_sgpr([17 x <4 x
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" }
+attributes #0 = { "unsafe-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readonly }
attributes #3 = { readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/shared-op-cycle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shared-op-cycle.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shared-op-cycle.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shared-op-cycle.ll Wed Apr 6 14:40:20 2016
@@ -4,7 +4,7 @@
; CHECK: MULADD_IEEE *
; CHECK-NOT: MULADD_IEEE *
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
+define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) {
%w0 = extractelement <4 x float> %reg0, i32 3
%w1 = extractelement <4 x float> %reg1, i32 3
%w2 = extractelement <4 x float> %reg2, i32 3
@@ -28,5 +28,4 @@ declare float @llvm.AMDGPU.dp4(<4 x floa
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-attributes #0 = { "ShaderType"="1" }
-attributes #1 = { readnone }
\ No newline at end of file
+attributes #1 = { readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-literal-folding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-literal-folding.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-literal-folding.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-literal-folding.ll Wed Apr 6 14:40:20 2016
@@ -4,7 +4,7 @@
; CHECK-LABEL: {{^}}main:
; CHECK-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0xbf4353f8
-define void @main(float) #0 {
+define amdgpu_vs void @main(float) {
main_body:
%1 = fmul float %0, 0x3FE86A7F00000000
%2 = fmul float %0, 0xBFE86A7F00000000
@@ -13,5 +13,3 @@ main_body:
}
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="1" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll Wed Apr 6 14:40:20 2016
@@ -6,7 +6,7 @@
; CHECK: {{^}}main:
; CHECK: image_sample_b v{{\[[0-9]:[0-9]\]}}, v{{\[[0-9]:[0-9]\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf
-define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -45,7 +45,7 @@ declare <4 x float> @llvm.SI.image.sampl
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-attributes #0 = { "ShaderType"="0" }
+
attributes #1 = { nounwind readnone }
!0 = !{!1, !1, i64 0, i32 1}
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll Wed Apr 6 14:40:20 2016
@@ -16,7 +16,7 @@
; CHECK: s_waitcnt vmcnt(0)
; CHECK: exp
; CHECK: s_endpgm
-define void @main([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
+define amdgpu_ps void @main([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
main_body:
%tmp = bitcast [34 x <8 x i32>] addrspace(2)* %arg3 to <32 x i8> addrspace(2)*
%tmp22 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp, align 32, !tbaa !0
@@ -54,7 +54,6 @@ declare i32 @llvm.SI.packf16(float, floa
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!1, !1, i64 0, i32 1}
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll Wed Apr 6 14:40:20 2016
@@ -22,7 +22,7 @@
; Writing to M0 from an SMRD instruction will hang the GPU.
; CHECK-NOT: s_buffer_load_dword m0
; CHECK: s_endpgm
-define void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define amdgpu_ps void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
main_body:
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
%tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -668,7 +668,7 @@ ENDIF66:
; CHECK-LABEL: {{^}}main1:
; CHECK: s_endpgm
-define void @main1([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define amdgpu_ps void @main1([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
main_body:
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
%tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -1610,7 +1610,6 @@ declare i32 @llvm.SI.packf16(float, floa
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-attributes #0 = { "ShaderType"="0" }
attributes #1 = { readnone }
attributes #2 = { nounwind readnone }
attributes #3 = { nounwind readonly }
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-spill-cf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-spill-cf.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-spill-cf.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-spill-cf.ll Wed Apr 6 14:40:20 2016
@@ -6,7 +6,7 @@
; SI: s_or_b64 exec, exec, [[SAVED:s\[[0-9]+:[0-9]+\]|[a-z]+]]
; SI-NOT: v_readlane_b32 [[SAVED]]
-define void @main() #1 {
+define amdgpu_ps void @main() {
main_body:
%0 = call float @llvm.SI.load.const(<16 x i8> undef, i32 16)
%1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 32)
@@ -510,5 +510,5 @@ declare float @llvm.maxnum.f32(float, fl
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { alwaysinline nounwind readnone }
-attributes #1 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
+attributes #1 = { "enable-no-nans-fp-math"="true" }
attributes #2 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll Wed Apr 6 14:40:20 2016
@@ -2,7 +2,7 @@
declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
-declare void @llvm.amdgcn.s.barrier() #2
+declare void @llvm.amdgcn.s.barrier() #1
@stored_lds_ptr = addrspace(3) global i32 addrspace(3)* undef, align 4
@@ -61,7 +61,7 @@ define void @no_reorder_barrier_local_lo
%tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
store i32 99, i32 addrspace(1)* %gptr, align 4
- call void @llvm.amdgcn.s.barrier() #2
+ call void @llvm.amdgcn.s.barrier() #1
%tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2
@@ -214,7 +214,7 @@ define void @reorder_global_offsets(i32
; XCI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}}, 0x4
; XCI: TBUFFER_STORE_FORMAT
; XCI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}}, 0x8
-; define void @reorder_local_load_tbuffer_store_local_load(i32 addrspace(1)* %out, i32 %a1, i32 %vaddr) #1 {
+; define amdgpu_vs void @reorder_local_load_tbuffer_store_local_load(i32 addrspace(1)* %out, i32 %a1, i32 %vaddr) #0 {
; %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
; %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
@@ -236,5 +236,4 @@ define void @reorder_global_offsets(i32
; }
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #1 = { "ShaderType"="1" nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #2 = { nounwind convergent }
+attributes #1 = { nounwind convergent }
Modified: llvm/trunk/test/CodeGen/AMDGPU/smrd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/smrd.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/smrd.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/smrd.ll Wed Apr 6 14:40:20 2016
@@ -88,7 +88,7 @@ entry:
; GCN-LABEL: {{^}}smrd_load_const0:
; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10
-define void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+define amdgpu_ps void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
main_body:
%20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
%21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
@@ -102,7 +102,7 @@ main_body:
; GCN-LABEL: {{^}}smrd_load_const1:
; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
-define void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+define amdgpu_ps void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
main_body:
%20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
%21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
@@ -118,7 +118,7 @@ main_body:
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
-define void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+define amdgpu_ps void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
main_body:
%20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
%21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
@@ -133,7 +133,7 @@ main_body:
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
-define void @smrd_load_const3(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+define amdgpu_ps void @smrd_load_const3(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
main_body:
%20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
%21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
@@ -148,7 +148,7 @@ main_body:
; SIVI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
; GCN: s_endpgm
-define void @smrd_load_const4(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+define amdgpu_ps void @smrd_load_const4(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
main_body:
%20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
%21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
@@ -158,9 +158,8 @@ main_body:
}
; Function Attrs: nounwind readnone
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+declare float @llvm.SI.load.const(<16 x i8>, i32) #0
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
+attributes #0 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll Wed Apr 6 14:40:20 2016
@@ -6,7 +6,7 @@
; CHECK-LABEL: {{^}}split_smrd_add_worklist:
; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
-define void @split_smrd_add_worklist([34 x <8 x i32>] addrspace(2)* byval %arg) #0 {
+define amdgpu_ps void @split_smrd_add_worklist([34 x <8 x i32>] addrspace(2)* byval %arg) #0 {
bb:
%tmp = call float @llvm.SI.load.const(<16 x i8> undef, i32 96)
%tmp1 = bitcast float %tmp to i32
@@ -38,7 +38,7 @@ declare <4 x float> @llvm.SI.image.sampl
declare i32 @llvm.SI.packf16(float, float) #1
-attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" }
+attributes #0 = { "unsafe-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!1, !1, i64 0, i32 1}
Modified: llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll Wed Apr 6 14:40:20 2016
@@ -44,7 +44,7 @@ for.inc.1:
; SI-LABEL: {{^}}foo:
; SI: s_endpgm
-define void @foo() #0 {
+define amdgpu_ps void @foo() #0 {
bb:
br i1 undef, label %bb2, label %bb1
@@ -105,5 +105,5 @@ declare i32 @llvm.SI.packf16(float, floa
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" }
+attributes #0 = { "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" }
attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/swizzle-export.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/swizzle-export.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/swizzle-export.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/swizzle-export.ll Wed Apr 6 14:40:20 2016
@@ -6,7 +6,7 @@
;EG: EXPORT T{{[0-9]+}}.XXWX
;EG: EXPORT T{{[0-9]+}}.XXXW
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
+define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) {
main_body:
%0 = extractelement <4 x float> %reg1, i32 0
%1 = extractelement <4 x float> %reg1, i32 1
@@ -96,7 +96,7 @@ main_body:
; EG: T{{[0-9]+}}.XY__
; EG: T{{[0-9]+}}.ZXY0
-define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
+define amdgpu_vs void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) {
main_body:
%0 = extractelement <4 x float> %reg1, i32 0
%1 = extractelement <4 x float> %reg1, i32 1
@@ -125,5 +125,4 @@ declare float @llvm.cos.f32(float) #1
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readonly }
Modified: llvm/trunk/test/CodeGen/AMDGPU/tex-clause-antidep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/tex-clause-antidep.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/tex-clause-antidep.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/tex-clause-antidep.ll Wed Apr 6 14:40:20 2016
@@ -3,7 +3,7 @@
;CHECK: TEX
;CHECK-NEXT: ALU
-define void @test(<4 x float> inreg %reg0) #0 {
+define amdgpu_vs void @test(<4 x float> inreg %reg0) {
%1 = extractelement <4 x float> %reg0, i32 0
%2 = extractelement <4 x float> %reg0, i32 1
%3 = extractelement <4 x float> %reg0, i32 2
@@ -21,5 +21,3 @@ define void @test(<4 x float> inreg %reg
declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/texture-input-merge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/texture-input-merge.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/texture-input-merge.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/texture-input-merge.ll Wed Apr 6 14:40:20 2016
@@ -2,7 +2,7 @@
;CHECK-NOT: MOV
-define void @test(<4 x float> inreg %reg0) #0 {
+define amdgpu_vs void @test(<4 x float> inreg %reg0) {
%1 = extractelement <4 x float> %reg0, i32 0
%2 = extractelement <4 x float> %reg0, i32 1
%3 = extractelement <4 x float> %reg0, i32 2
@@ -27,5 +27,3 @@ define void @test(<4 x float> inreg %reg
declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll Wed Apr 6 14:40:20 2016
@@ -5,7 +5,7 @@
;CHECK-LABEL: {{^}}test1:
;CHECK: s_cbranch_execz
;CHECK: %loop_body
-define void @test1(<8 x i32> inreg %rsrc, <2 x i32> %addr.base, i32 %y, i32 %p) #0 {
+define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <2 x i32> %addr.base, i32 %y, i32 %p) {
main_body:
%cc = icmp eq i32 %p, 0
br i1 %cc, label %out, label %loop_body
@@ -58,5 +58,4 @@ done1:
declare i32 @llvm.amdgcn.workitem.id.x() #1
-attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readonly }
Modified: llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll Wed Apr 6 14:40:20 2016
@@ -24,7 +24,7 @@
; GCN: NumVgprs: 256
; GCN: ScratchSize: 1024
-define void @main([9 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <16 x i8>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
+define amdgpu_vs void @main([9 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <16 x i8>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) {
bb:
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i64 0, i64 0
%tmp11 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, align 16, !tbaa !0
@@ -491,7 +491,7 @@ declare void @llvm.SI.export(i32, i32, i
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
+attributes #0 = { "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!1, !1, i64 0, i32 1}
Modified: llvm/trunk/test/CodeGen/AMDGPU/wait.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/wait.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/wait.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/wait.ll Wed Apr 6 14:40:20 2016
@@ -11,7 +11,7 @@
; DEFAULT: exp
; DEFAULT: s_waitcnt lgkmcnt(0)
; DEFAULT: s_endpgm
-define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 {
+define amdgpu_vs void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 0
%tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -45,8 +45,8 @@ main_body:
; ILPMAX: s_waitcnt vmcnt(0)
; ILPMAX: s_endpgm
-define void @main2([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)*
-byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
+define amdgpu_vs void @main2([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)*
+byval, i32 inreg, i32 inreg, i32, i32, i32, i32) {
main_body:
%11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
@@ -78,7 +78,6 @@ declare <4 x float> @llvm.SI.vs.load.inp
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-attributes #0 = { "ShaderType"="1" }
attributes #1 = { convergent nounwind }
attributes #2 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/wqm.ll?rev=265589&r1=265588&r2=265589&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/wqm.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/wqm.ll Wed Apr 6 14:40:20 2016
@@ -5,7 +5,7 @@
;
;CHECK-LABEL: {{^}}test1:
;CHECK-NOT: s_wqm
-define <4 x float> @test1(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
+define amdgpu_ps <4 x float> @test1(<8 x i32> inreg %rsrc, <4 x i32> %c) {
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
call void @llvm.amdgcn.image.store.v4i32(<4 x float> %tex, <4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
@@ -20,7 +20,7 @@ main_body:
;CHECK: image_sample
;CHECK-NOT: exec
;CHECK: _load_dword v0,
-define float @test2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) #0 {
+define amdgpu_ps float @test2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) {
main_body:
%c.1 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%c.2 = bitcast <4 x float> %c.1 to <4 x i32>
@@ -40,7 +40,7 @@ main_body:
;CHECK: s_and_b64 exec, exec, [[ORIG]]
;CHECK: store
;CHECK-NOT: exec
-define <4 x float> @test3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) #0 {
+define amdgpu_ps <4 x float> @test3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) {
main_body:
%tex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tex.1 = bitcast <4 x float> %tex to <4 x i32>
@@ -62,7 +62,7 @@ main_body:
;CHECK: store
;CHECK: s_wqm_b64 exec, exec
;CHECK: image_sample v[0:3], [[MUL]], s[0:7], s[8:11] dmask:0xf
-define <4 x float> @test4(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %c, i32 %d, float %data) #0 {
+define amdgpu_ps <4 x float> @test4(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %c, i32 %d, float %data) {
main_body:
%c.1 = mul i32 %c, %d
%gep = getelementptr float, float addrspace(1)* %ptr, i32 %c.1
@@ -88,7 +88,7 @@ main_body:
;CHECK: s_mov_b64 exec, [[SAVED]]
;CHECK: %IF
;CHECK: image_sample
-define float @test_control_flow_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %c, i32 %z, float %data) #0 {
+define amdgpu_ps float @test_control_flow_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %c, i32 %z, float %data) {
main_body:
%cmp = icmp eq i32 %z, 0
br i1 %cmp, label %IF, label %ELSE
@@ -124,7 +124,7 @@ END:
;CHECK-NEXT: %ELSE
;CHECK: store
;CHECK: %END
-define float @test_control_flow_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %c, i32 %z, float %data) #0 {
+define amdgpu_ps float @test_control_flow_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %c, i32 %z, float %data) {
main_body:
%cmp = icmp eq i32 %z, 0
br i1 %cmp, label %ELSE, label %IF
@@ -158,7 +158,7 @@ END:
;CHECK: store
;CHECK: s_wqm_b64 exec, exec
;CHECK: v_cmp
-define <4 x float> @test_control_flow_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <3 x i32> %idx, <2 x float> %data, i32 %coord) #0 {
+define amdgpu_ps <4 x float> @test_control_flow_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <3 x i32> %idx, <2 x float> %data, i32 %coord) {
main_body:
%idx.1 = extractelement <3 x i32> %idx, i32 0
%gep.1 = getelementptr float, float addrspace(1)* %ptr, i32 %idx.1
@@ -205,7 +205,7 @@ END:
;CHECK: load
;CHECK: store
;CHECK: v_cmp
-define float @test_control_flow_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <3 x i32> %idx, <2 x float> %data, i32 %coord) #0 {
+define amdgpu_ps float @test_control_flow_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <3 x i32> %idx, <2 x float> %data, i32 %coord) {
main_body:
%tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tex.1 = extractelement <4 x float> %tex, i32 0
@@ -253,7 +253,7 @@ END:
;CHECK: s_mov_b64 exec, [[SAVE]]
;CHECK: %END
;CHECK: image_sample
-define <4 x float> @test_control_flow_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %coord, i32 %y, float %z) #0 {
+define amdgpu_ps <4 x float> @test_control_flow_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %coord, i32 %y, float %z) {
main_body:
%cond = icmp eq i32 %y, 0
br i1 %cond, label %IF, label %END
@@ -286,7 +286,7 @@ END:
;VI: flat_store_dword
;CHECK: s_mov_b64 exec, [[SAVE]]
;CHECK: image_sample
-define <4 x float> @test_kill_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <2 x i32> %idx, <2 x float> %data, i32 %coord, i32 %coord2, float %z) #0 {
+define amdgpu_ps <4 x float> @test_kill_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <2 x i32> %idx, <2 x float> %data, i32 %coord, i32 %coord2, float %z) {
main_body:
%tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -320,7 +320,7 @@ main_body:
;VI: flat_store_dword
;CHECK-NOT: wqm
;CHECK: v_cmpx_
-define <4 x float> @test_kill_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %idx, float %data, i32 %coord, i32 %coord2, float %z) #0 {
+define amdgpu_ps <4 x float> @test_kill_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %idx, float %data, i32 %coord, i32 %coord2, float %z) {
main_body:
%tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -342,7 +342,6 @@ declare <4 x float> @llvm.SI.image.sampl
declare void @llvm.AMDGPU.kill(float)
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind }
attributes #2 = { nounwind readonly }
attributes #3 = { nounwind readnone }
More information about the llvm-commits
mailing list