[clang] [llvm] [AMDGPU] Adding the amdgpu-num-work-groups function attribute (PR #75647)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Dec 15 12:01:23 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jun Wang (jwanggit86)
<details>
<summary>Changes</summary>
A new function attribute named amdgpu-num-work-groups is added. This attribute allows programmers to let the compiler know the number of workgroups to be launched and do optimizations based on that information.
---
Full diff: https://github.com/llvm/llvm-project/pull/75647.diff
13 Files Affected:
- (modified) clang/include/clang/Basic/Attr.td (+7)
- (modified) clang/include/clang/Basic/AttrDocs.td (+23)
- (modified) clang/lib/CodeGen/Targets/AMDGPU.cpp (+7)
- (modified) clang/lib/Sema/SemaDeclAttr.cpp (+13)
- (modified) clang/test/Misc/pragma-attribute-supported-attributes-list.test (+1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp (+4)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (+6)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h (+3)
- (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (+1)
- (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h (+9)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+15)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+8)
- (added) llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-work-groups.ll (+82)
``````````diff
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 5943583d92773a..605fcbbff027b9 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -2011,6 +2011,13 @@ def AMDGPUNumVGPR : InheritableAttr {
let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
}
+def AMDGPUNumWorkGroups : InheritableAttr {
+ let Spellings = [Clang<"amdgpu_num_work_groups", 0>];
+ let Args = [UnsignedArgument<"NumWorkGroups">];
+ let Documentation = [AMDGPUNumWorkGroupsDocs];
+ let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
+}
+
def AMDGPUKernelCall : DeclOrTypeAttr {
let Spellings = [Clang<"amdgpu_kernel">];
let Documentation = [Undocumented];
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 77950ab6d877ea..0bf3ccf367284c 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -2693,6 +2693,29 @@ An error will be given if:
}];
}
+def AMDGPUNumWorkGroupsDocs : Documentation {
+ let Category = DocCatAMDGPUAttributes;
+ let Content = [{
+The number of work groups specifies the number of work groups when the kernel
+is dispatched.
+
+Clang supports the
+``__attribute__((amdgpu_num_work_groups(<num>)))`` attribute for the
+AMDGPU target. This attribute may be attached to a kernel function definition
+and is an optimization hint.
+
+``<num>`` parameter specifies the number of work groups.
+
+If specified, the AMDGPU target backend might be able to produce better machine
+code.
+
+An error will be given if:
+ - Specified values violate subtarget specifications;
+ - Specified values are not compatible with values provided through other
+ attributes.
+ }];
+}
+
def DocCatCallingConvs : DocumentationCategory<"Calling Conventions"> {
let Content = [{
Clang supports several different calling conventions, depending on the target
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 03ac6b78598fc8..11a0835f37f4a9 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -356,6 +356,13 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
if (NumVGPR != 0)
F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
}
+
+ if (const auto *Attr = FD->getAttr<AMDGPUNumWorkGroupsAttr>()) {
+ uint32_t NumWG = Attr->getNumWorkGroups();
+
+ if (NumWG != 0)
+ F->addFnAttr("amdgpu-num-work-groups", llvm::utostr(NumWG));
+ }
}
/// Emits control constants used to change per-architecture behaviour in the
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 5b29b05dee54b3..3737dd256aff02 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -8051,6 +8051,16 @@ static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
D->addAttr(::new (S.Context) AMDGPUNumVGPRAttr(S.Context, AL, NumVGPR));
}
+static void handleAMDGPUNumWorkGroupsAttr(Sema &S, Decl *D,
+ const ParsedAttr &AL) {
+ uint32_t NumWG = 0;
+ Expr *NumWGExpr = AL.getArgAsExpr(0);
+ if (!checkUInt32Argument(S, AL, NumWGExpr, NumWG))
+ return;
+
+ D->addAttr(::new (S.Context) AMDGPUNumWorkGroupsAttr(S.Context, AL, NumWG));
+}
+
static void handleX86ForceAlignArgPointerAttr(Sema &S, Decl *D,
const ParsedAttr &AL) {
// If we try to apply it to a function pointer, don't warn, but don't
@@ -9058,6 +9068,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
case ParsedAttr::AT_AMDGPUNumVGPR:
handleAMDGPUNumVGPRAttr(S, D, AL);
break;
+ case ParsedAttr::AT_AMDGPUNumWorkGroups:
+ handleAMDGPUNumWorkGroupsAttr(S, D, AL);
+ break;
case ParsedAttr::AT_AVRSignal:
handleAVRSignalAttr(S, D, AL);
break;
diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index bdfda430eea86c..d42bb52cc8bcfa 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -6,6 +6,7 @@
// CHECK-NEXT: AMDGPUFlatWorkGroupSize (SubjectMatchRule_function)
// CHECK-NEXT: AMDGPUNumSGPR (SubjectMatchRule_function)
// CHECK-NEXT: AMDGPUNumVGPR (SubjectMatchRule_function)
+// CHECK-NEXT: AMDGPUNumWorkGroups (SubjectMatchRule_function)
// CHECK-NEXT: AMDGPUWavesPerEU (SubjectMatchRule_function)
// CHECK-NEXT: AVRSignal (SubjectMatchRule_function)
// CHECK-NEXT: AbiTag (SubjectMatchRule_record_not_is_union, SubjectMatchRule_variable, SubjectMatchRule_function, SubjectMatchRule_namespace)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index b51a876750b58b..b9ede45e174a7d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -494,6 +494,10 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
Kern[".max_flat_workgroup_size"] =
Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
+ unsigned NumWG = MFI.getNumWorkGroups();
+ if (NumWG != 0) {
+ Kern[".num_work_groups"] = Kern.getDocument()->getNode(NumWG);
+ }
Kern[".sgpr_spill_count"] =
Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
Kern[".vgpr_spill_count"] =
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index f19c5766856408..d7f5c456706ecd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -1108,3 +1108,9 @@ void GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs(unsigned NumSGPRs) {
unsigned GCNUserSGPRUsageInfo::getNumFreeUserSGPRs() {
return AMDGPU::getMaxNumUserSGPRs(ST) - NumUsedUserSGPRs;
}
+
+unsigned AMDGPUSubtarget::getNumWorkGroups(const Function &F) const {
+ const unsigned Default = 0;
+ return AMDGPU::getUnsignedIntegerAttribute(F, "amdgpu-num-work-groups", Default);
+}
+
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index b72697973be7a1..b791399c38dff8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -288,6 +288,9 @@ class AMDGPUSubtarget {
/// 2) dimension.
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;
+ /// Return the number of work groups for the function.
+ unsigned getNumWorkGroups(const Function &F) const;
+
/// Return true if only a single workitem can be active in a wave.
bool isSingleLaneExecution(const Function &Kernel) const;
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 48c341917ddec7..2f483e18544a78 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -46,6 +46,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
const GCNSubtarget &ST = *static_cast<const GCNSubtarget *>(STI);
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
WavesPerEU = ST.getWavesPerEU(F);
+ NumWorkGroups = ST.getNumWorkGroups(F);
Occupancy = ST.computeOccupancy(F, getLDSSize());
CallingConv::ID CC = F.getCallingConv();
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 7ff50c80081d30..fc244552f40da8 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -426,6 +426,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV;
+ // Default/requested number of work groups for the function.
+ unsigned NumWorkGroups = 0;
+
private:
unsigned NumUserSGPRs = 0;
unsigned NumSystemSGPRs = 0;
@@ -1094,6 +1097,12 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
// \returns true if a function needs or may need AGPRs.
bool usesAGPRs(const MachineFunction &MF) const;
+
+ /// \returns Default/requested number of work groups for this function.
+ unsigned getNumWorkGroups() const {
+ return NumWorkGroups;
+ }
+
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 4edd7960bd8c40..82e3bca7ab73b1 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1221,6 +1221,21 @@ getIntegerPairAttribute(const Function &F, StringRef Name,
return Ints;
}
+unsigned getUnsignedIntegerAttribute(const Function &F, StringRef Name, unsigned Default) {
+ Attribute A = F.getFnAttribute(Name);
+ if (!A.isStringAttribute())
+ return Default;
+
+ LLVMContext &Ctx = F.getContext();
+ unsigned IntVal = Default;
+ StringRef Str = A.getValueAsString();
+ if (Str.trim().getAsInteger(0, IntVal)) {
+ Ctx.emitError("can't parse integer attribute " + Name);
+ return Default;
+ }
+ return IntVal;
+}
+
unsigned getVmcntBitMask(const IsaVersion &Version) {
return (1 << (getVmcntBitWidthLo(Version.Major) +
getVmcntBitWidthHi(Version.Major))) -
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 3c9f330cbcded9..c54c1638fa97a1 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -818,6 +818,14 @@ bool shouldEmitConstantsToTextSection(const Triple &TT);
/// to integer.
int getIntegerAttribute(const Function &F, StringRef Name, int Default);
+/// \returns Unsigned Integer value requested using \p F's \p Name attribute.
+///
+/// \returns \p Default if attribute is not present.
+///
+/// \returns \p Default and emits error if requested value cannot be converted
+/// to integer.
+unsigned getUnsignedIntegerAttribute(const Function &F, StringRef Name, unsigned Default);
+
/// \returns A pair of integer values requested using \p F's \p Name attribute
/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
/// is false).
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-work-groups.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-work-groups.ll
new file mode 100644
index 00000000000000..315cd7dc0c0d91
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-work-groups.ll
@@ -0,0 +1,82 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
+
+; Attribute not specified.
+; CHECK-LABEL: {{^}}empty_no_attribute:
+define amdgpu_kernel void @empty_no_attribute() {
+entry:
+ ret void
+}
+
+; Ignore if number of work groups is 0.
+; CHECK-LABEL: {{^}}empty_num_work_groups_0:
+define amdgpu_kernel void @empty_num_work_groups_0() #0 {
+entry:
+ ret void
+}
+attributes #0 = {"amdgpu-num-work-groups"="0"}
+
+; Exactly 1 work group.
+; CHECK-LABEL: {{^}}empty_num_work_groups_1:
+define amdgpu_kernel void @empty_num_work_groups_1() #1 {
+entry:
+ ret void
+}
+attributes #1 = {"amdgpu-num-work-groups"="1"}
+
+; Exactly 5 work groups.
+; CHECK-LABEL: {{^}}empty_num_work_groups_5:
+define amdgpu_kernel void @empty_num_work_groups_5() #2 {
+entry:
+ ret void
+}
+attributes #2 = {"amdgpu-num-work-groups"="5"}
+
+; Exactly 32 work groups.
+; CHECK-LABEL: {{^}}empty_num_work_groups_32:
+define amdgpu_kernel void @empty_num_work_groups_32() #3 {
+entry:
+ ret void
+}
+attributes #3 = {"amdgpu-num-work-groups"="32"}
+
+; Exactly 50 work groups.
+; CHECK-LABEL: {{^}}empty_num_work_groups_50:
+define amdgpu_kernel void @empty_num_work_groups_50() #4 {
+entry:
+ ret void
+}
+attributes #4 = {"amdgpu-num-work-groups"="50"}
+
+; Exactly 256 work groups.
+; CHECK-LABEL: {{^}}empty_num_work_groups_256:
+define amdgpu_kernel void @empty_num_work_groups_256() #5 {
+entry:
+ ret void
+}
+attributes #5 = {"amdgpu-num-work-groups"="256"}
+
+; Exactly 1024 work groups.
+; CHECK-LABEL: {{^}}empty_num_work_groups_1024:
+define amdgpu_kernel void @empty_num_work_groups_1024() #6 {
+entry:
+ ret void
+}
+attributes #6 = {"amdgpu-num-work-groups"="1024"}
+
+; CHECK: .amdgpu_metadata
+; CHECK: .name: empty_no_attribute
+; CHECK-NEXT: .private_segment_fixed_size: 0
+; CHECK: .name: empty_num_work_groups_0
+; CHECK-NEXT: .private_segment_fixed_size: 0
+; CHECK: .name: empty_num_work_groups_1
+; CHECK-NEXT: .num_work_groups: 1
+; CHECK: .name: empty_num_work_groups_5
+; CHECK-NEXT: .num_work_groups: 5
+; CHECK: .name: empty_num_work_groups_32
+; CHECK-NEXT: .num_work_groups: 32
+; CHECK: .name: empty_num_work_groups_50
+; CHECK-NEXT: .num_work_groups: 50
+; CHECK: .name: empty_num_work_groups_256
+; CHECK-NEXT: .num_work_groups: 256
+; CHECK: .name: empty_num_work_groups_1024
+; CHECK-NEXT: .num_work_groups: 1024
``````````
</details>
https://github.com/llvm/llvm-project/pull/75647
More information about the cfe-commits
mailing list