[llvm] [NVPTX] Add family-specific architectures support (PR #141899)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 28 23:20:06 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Rajat Bajpai (rajatbajpai)
<details>
<summary>Changes</summary>
This change adds family-specific architectures support added in [PTX ISA 8.8](https://docs.nvidia.com/cuda/parallel-thread-execution/#ptx-isa-version-8-8). These architectures have "f" suffix. For example, sm_100f.
This change doesn't promote existing features to family-specific architecture.
---
Full diff: https://github.com/llvm/llvm-project/pull/141899.diff
2 Files Affected:
- (modified) llvm/lib/Target/NVPTX/NVPTX.td (+13-6)
- (modified) llvm/lib/Target/NVPTX/NVPTXSubtarget.h (+23-4)
``````````diff
diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td
index ff9a187ecf723..3ed2553fa4232 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/llvm/lib/Target/NVPTX/NVPTX.td
@@ -41,12 +41,14 @@ foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
// Arch-specific targets. PTX for these is not compatible with any other
// architectures.
-def SM90a : FeatureSM<"90a", 901>;
-def SM100a: FeatureSM<"100a", 1001>;
-def SM101a: FeatureSM<"101a", 1011>;
-def SM103a: FeatureSM<"103a", 1031>;
-def SM120a: FeatureSM<"120a", 1201>;
-def SM121a: FeatureSM<"121a", 1211>;
+foreach sm = [90, 100, 101, 103, 120, 121] in {
+ def SM#sm#a : FeatureSM<""#sm#"a", !add(!mul(sm, 10), 1)>;
+}
+
+// Family-specific targets. PTX for these is compatible within the same family.
+foreach sm = [100, 101, 103, 120, 121] in {
+ def SM#sm#f : FeatureSM<""#sm#"f", !add(!mul(sm, 10), 2)>;
+}
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,
70, 71, 72, 73, 74, 75, 76, 77, 78,
@@ -83,14 +85,19 @@ def : Proc<"sm_90", [SM90, PTX78]>;
def : Proc<"sm_90a", [SM90a, PTX80]>;
def : Proc<"sm_100", [SM100, PTX86]>;
def : Proc<"sm_100a", [SM100a, PTX86]>;
+def : Proc<"sm_100f", [SM100f, PTX88]>;
def : Proc<"sm_101", [SM101, PTX86]>;
def : Proc<"sm_101a", [SM101a, PTX86]>;
+def : Proc<"sm_101f", [SM101f, PTX88]>;
def : Proc<"sm_103", [SM103, PTX88]>;
def : Proc<"sm_103a", [SM103a, PTX88]>;
+def : Proc<"sm_103f", [SM103f, PTX88]>;
def : Proc<"sm_120", [SM120, PTX87]>;
def : Proc<"sm_120a", [SM120a, PTX87]>;
+def : Proc<"sm_120f", [SM120f, PTX88]>;
def : Proc<"sm_121", [SM121, PTX88]>;
def : Proc<"sm_121a", [SM121a, PTX88]>;
+def : Proc<"sm_121f", [SM121f, PTX88]>;
def NVPTXInstrInfo : InstrInfo {
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 5136b1ee28502..5e4ab9476cb31 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -132,10 +132,29 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
// are supported on the specified architecture only, hence such targets do not
// follow the onion layer model. hasArchAccelFeatures() allows
// distinguishing such GPU variants from the base GPU architecture.
- // - 0 represents base GPU model,
- // - non-zero value identifies particular architecture-accelerated variant.
- bool hasArchAccelFeatures() const { return getFullSmVersion() % 10; }
-
+ // - false represents non-accelerated architecture.
+ // - true represents architecture-accelerated variant.
+ bool hasArchAccelFeatures() const {
+ auto FullSMVersionMod = getFullSmVersion() % 10;
+ assert(FullSMVersionMod < 3 && "Invalid architecture!");
+ return FullSMVersionMod == 1;
+ }
+ // GPUs with 'f' suffix have architecture-accelerated features which are
+ // portable across all future architectures under same SM major. For example,
+ // sm_100f features will work for sm_10X future architectures.
+ // - false represents non-family-specific architecture.
+ // - true represents family-specific variant.
+ bool hasFamilySpecificFeatures() const {
+ auto FullSMVersionMod = getFullSmVersion() % 10;
+ assert(FullSMVersionMod < 3 && "Invalid architecture!");
+ return FullSMVersionMod == 2 && PTXVersion >= 88;
+ }
+ // Checks if architecture is accelerated or family-specific.
+ // - false represents neither arch-accelerated nor family-specific arch.
+ // - true represents either arch-accelerated or family-specific arch.
+ bool hasArchAccelOrFamilySpecificFeatures() const {
+ return hasArchAccelFeatures() || hasFamilySpecificFeatures();
+ }
// If the user did not provide a target we default to the `sm_30` target.
std::string getTargetName() const {
return TargetName.empty() ? "sm_30" : TargetName;
``````````
</details>
https://github.com/llvm/llvm-project/pull/141899
More information about the llvm-commits
mailing list