[llvm] r290608 - [AMDGPU][llvm-mc] Predefined symbols to access register counts (.kernel.{v|s}gpr_count)
Artem Tamazov via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 27 08:00:12 PST 2016
Author: artem.tamazov
Date: Tue Dec 27 10:00:11 2016
New Revision: 290608
URL: http://llvm.org/viewvc/llvm-project?rev=290608&view=rev
Log:
[AMDGPU][llvm-mc] Predefined symbols to access register counts (.kernel.{v|s}gpr_count)
The feature allows for conditional assembly, filling the entries
of .amd_kernel_code_t etc.
Symbols are defined with value 0 at the beginning of each kernel scope.
After each register usage, the respective symbol is set to:
value = max( value, ( register index + 1 ) )
Thus, at the end of scope the value represents a count of used registers.
Kernel scopes begin at .amdgpu_hsa_kernel directive, end at the
next .amdgpu_hsa_kernel (or EOF, whichever comes first). There is also
dummy scope that lies from the beginning of source file til the
first .amdgpu_hsa_kernel.
Test added.
Differential Revision: https://reviews.llvm.org/D27859
Added:
llvm/trunk/test/MC/AMDGPU/sym_kernel_scope.s
llvm/trunk/test/MC/AMDGPU/sym_option.s
- copied, changed from r290607, llvm/trunk/test/MC/AMDGPU/symbol_special.s
Removed:
llvm/trunk/test/MC/AMDGPU/symbol_special.s
Modified:
llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=290608&r1=290607&r2=290608&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Tue Dec 27 10:00:11 2016
@@ -661,6 +661,49 @@ raw_ostream &operator <<(raw_ostream &OS
// AsmParser
//===----------------------------------------------------------------------===//
+// Holds info related to the current kernel, e.g. count of SGPRs used.
+// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
+// .amdgpu_hsa_kernel or at EOF.
+class KernelScopeInfo {
+ int SgprIndexUnusedMin;
+ int VgprIndexUnusedMin;
+ MCContext *Ctx;
+
+ void usesSgprAt(int i) {
+ if (i >= SgprIndexUnusedMin) {
+ SgprIndexUnusedMin = ++i;
+ if (Ctx) {
+ MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
+ Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
+ }
+ }
+ }
+ void usesVgprAt(int i) {
+ if (i >= VgprIndexUnusedMin) {
+ VgprIndexUnusedMin = ++i;
+ if (Ctx) {
+ MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
+ Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
+ }
+ }
+ }
+public:
+ KernelScopeInfo() : SgprIndexUnusedMin(-1), VgprIndexUnusedMin(-1), Ctx(nullptr)
+ {}
+ void initialize(MCContext &Context) {
+ Ctx = &Context;
+ usesSgprAt(SgprIndexUnusedMin = -1);
+ usesVgprAt(VgprIndexUnusedMin = -1);
+ }
+ void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
+ switch (RegKind) {
+ case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
+ case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
+ default: break;
+ }
+ }
+};
+
class AMDGPUAsmParser : public MCTargetAsmParser {
const MCInstrInfo &MII;
MCAsmParser &Parser;
@@ -668,6 +711,7 @@ class AMDGPUAsmParser : public MCTargetA
unsigned ForcedEncodingSize;
bool ForcedDPP;
bool ForcedSDWA;
+ KernelScopeInfo KernelScope;
/// @name Auto-generated Match Functions
/// {
@@ -693,7 +737,7 @@ private:
bool ParseSectionDirectiveHSADataGlobalProgram();
bool ParseSectionDirectiveHSARodataReadonlyAgent();
bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum);
- bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth);
+ bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex);
void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, bool IsAtomic, bool IsAtomicReturn);
public:
@@ -731,6 +775,7 @@ public:
Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
Sym->setVariableValue(MCConstantExpr::create(Isa.Stepping, Ctx));
}
+ KernelScope.initialize(getContext());
}
bool isSI() const {
@@ -1240,8 +1285,9 @@ bool AMDGPUAsmParser::AddNextRegisterToL
}
}
-bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth)
+bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex)
{
+ if (DwordRegIndex) { *DwordRegIndex = 0; }
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
if (getLexer().is(AsmToken::Identifier)) {
StringRef RegName = Parser.getTok().getString();
@@ -1301,7 +1347,7 @@ bool AMDGPUAsmParser::ParseAMDGPURegiste
} else if (getLexer().is(AsmToken::LBrac)) {
// List of consecutive registers: [s0,s1,s2,s3]
Parser.Lex();
- if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
+ if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
return false;
if (RegWidth != 1)
return false;
@@ -1313,7 +1359,7 @@ bool AMDGPUAsmParser::ParseAMDGPURegiste
} else if (getLexer().is(AsmToken::RBrac)) {
Parser.Lex();
break;
- } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1)) {
+ } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
if (RegWidth1 != 1) {
return false;
}
@@ -1341,11 +1387,12 @@ bool AMDGPUAsmParser::ParseAMDGPURegiste
{
unsigned Size = 1;
if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
- // SGPR and TTMP registers must be are aligned. Max required alignment is 4 dwords.
+ // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
Size = std::min(RegWidth, 4u);
}
if (RegNum % Size != 0)
return false;
+ if (DwordRegIndex) { *DwordRegIndex = RegNum; }
RegNum = RegNum / Size;
int RCID = getRegClass(RegKind, RegWidth);
if (RCID == -1)
@@ -1371,11 +1418,12 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsm
SMLoc StartLoc = Tok.getLoc();
SMLoc EndLoc = Tok.getEndLoc();
RegisterKind RegKind;
- unsigned Reg, RegNum, RegWidth;
+ unsigned Reg, RegNum, RegWidth, DwordRegIndex;
- if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
+ if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
return nullptr;
}
+ KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
}
@@ -1842,6 +1890,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDG
getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
ELF::STT_AMDGPU_HSA_KERNEL);
Lex();
+ KernelScope.initialize(getContext());
return false;
}
Added: llvm/trunk/test/MC/AMDGPU/sym_kernel_scope.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/sym_kernel_scope.s?rev=290608&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/sym_kernel_scope.s (added)
+++ llvm/trunk/test/MC/AMDGPU/sym_kernel_scope.s Tue Dec 27 10:00:11 2016
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s
+
+.byte .kernel.sgpr_count
+// CHECK: .byte 0
+.byte .kernel.vgpr_count
+// CHECK: .byte 0
+ v_mov_b32_e32 v5, s8
+ s_endpgm
+.byte .kernel.sgpr_count
+// CHECK: .byte 9
+.byte .kernel.vgpr_count
+// CHECK: .byte 6
+
+.amdgpu_hsa_kernel K1
+K1:
+.byte .kernel.sgpr_count
+// CHECK: .byte 0
+.byte .kernel.vgpr_count
+// CHECK: .byte 0
+ v_mov_b32_e32 v1, s86
+ s_endpgm
+.byte .kernel.sgpr_count
+// CHECK: .byte 87
+.byte .kernel.vgpr_count
+// CHECK: .byte 2
+
+.amdgpu_hsa_kernel K2
+.byte .kernel.sgpr_count
+// CHECK: .byte 0
+.byte .kernel.vgpr_count
+// CHECK: .byte 0
+K2:
+ s_load_dwordx8 s[16:23], s[0:1], 0x0
+ v_mov_b32_e32 v0, v0
+ s_endpgm
+.byte .kernel.sgpr_count
+// CHECK: .byte 24
+.byte .kernel.vgpr_count
+// CHECK: .byte 1
+
+.text
+.amdgpu_hsa_kernel K3
+K3:
+A = .kernel.vgpr_count
+ v_mov_b32_e32 v[A], s0
+B = .kernel.vgpr_count
+ v_mov_b32_e32 v[B], s0
+ v_mov_b32_e32 v[B], v[A]
+C = .kernel.vgpr_count
+ v_mov_b32_e32 v[C], v[A]
+D = .kernel.sgpr_count + 3 // align
+E = D + 4
+ s_load_dwordx4 s[D:D+3], s[E:E+1], 0x0
+ s_endpgm
+
+.byte .kernel.sgpr_count
+// CHECK: .byte 10
+.byte .kernel.vgpr_count
+// CHECK: .byte 3
Copied: llvm/trunk/test/MC/AMDGPU/sym_option.s (from r290607, llvm/trunk/test/MC/AMDGPU/symbol_special.s)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/sym_option.s?p2=llvm/trunk/test/MC/AMDGPU/sym_option.s&p1=llvm/trunk/test/MC/AMDGPU/symbol_special.s&r1=290607&r2=290608&rev=290608&view=diff
==============================================================================
(empty)
Removed: llvm/trunk/test/MC/AMDGPU/symbol_special.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/symbol_special.s?rev=290607&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/symbol_special.s (original)
+++ llvm/trunk/test/MC/AMDGPU/symbol_special.s (removed)
@@ -1,46 +0,0 @@
-// RUN: llvm-mc -arch=amdgcn -mcpu=SI %s | FileCheck %s --check-prefix=SI
-// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire %s | FileCheck %s --check-prefix=BONAIRE
-// RUN: llvm-mc -arch=amdgcn -mcpu=hawaii %s | FileCheck %s --check-prefix=HAWAII
-// RUN: llvm-mc -arch=amdgcn -mcpu=kabini %s | FileCheck %s --check-prefix=KABINI
-// RUN: llvm-mc -arch=amdgcn -mcpu=iceland %s | FileCheck %s --check-prefix=ICELAND
-// RUN: llvm-mc -arch=amdgcn -mcpu=carrizo %s | FileCheck %s --check-prefix=CARRIZO
-// RUN: llvm-mc -arch=amdgcn -mcpu=tonga %s | FileCheck %s --check-prefix=TONGA
-// RUN: llvm-mc -arch=amdgcn -mcpu=fiji %s | FileCheck %s --check-prefix=FIJI
-// RUN: llvm-mc -arch=amdgcn -mcpu=gfx804 %s | FileCheck %s --check-prefix=GFX804
-// RUN: llvm-mc -arch=amdgcn -mcpu=stoney %s | FileCheck %s --check-prefix=STONEY
-
-.byte .option.machine_version_major
-// SI: .byte 0
-// BONAIRE: .byte 7
-// HAWAII: .byte 7
-// KABINI: .byte 7
-// ICELAND: .byte 8
-// CARRIZO: .byte 8
-// TONGA: .byte 8
-// FIJI: .byte 8
-// GFX804: .byte 8
-// STONEY: .byte 8
-
-.byte .option.machine_version_minor
-// SI: .byte 0
-// BONAIRE: .byte 0
-// HAWAII: .byte 0
-// KABINI: .byte 0
-// ICELAND: .byte 0
-// CARRIZO: .byte 0
-// TONGA: .byte 0
-// FIJI: .byte 0
-// GFX804: .byte 0
-// STONEY: .byte 1
-
-.byte .option.machine_version_stepping
-// SI: .byte 0
-// BONAIRE: .byte 0
-// HAWAII: .byte 1
-// KABINI: .byte 2
-// ICELAND: .byte 0
-// CARRIZO: .byte 1
-// TONGA: .byte 2
-// FIJI: .byte 3
-// GFX804: .byte 4
-// STONEY: .byte 0
More information about the llvm-commits
mailing list