[llvm] [NVPTX] Add family-specific architectures support (PR #141899)
Rajat Bajpai via llvm-commits
llvm-commits at lists.llvm.org
Sat May 31 02:42:48 PDT 2025
https://github.com/rajatbajpai updated https://github.com/llvm/llvm-project/pull/141899
>From 25e69b66efb4608cc9ab3df62a2235e458d3afa8 Mon Sep 17 00:00:00 2001
From: rbajpai <rbajpai at nvidia.com>
Date: Wed, 28 May 2025 16:48:44 +0530
Subject: [PATCH 1/4] [NVPTX] Add family-specific architectures support
This change adds family-specific architectures support. These
architectures have "f" suffix. For example, sm_100f.
This change doesn't promote existing features to family-specific
architecture.
---
llvm/lib/Target/NVPTX/NVPTX.td | 19 ++++++++++++------
llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 27 ++++++++++++++++++++++----
2 files changed, 36 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td
index ff9a187ecf723..3ed2553fa4232 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/llvm/lib/Target/NVPTX/NVPTX.td
@@ -41,12 +41,14 @@ foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
// Arch-specific targets. PTX for these is not compatible with any other
// architectures.
-def SM90a : FeatureSM<"90a", 901>;
-def SM100a: FeatureSM<"100a", 1001>;
-def SM101a: FeatureSM<"101a", 1011>;
-def SM103a: FeatureSM<"103a", 1031>;
-def SM120a: FeatureSM<"120a", 1201>;
-def SM121a: FeatureSM<"121a", 1211>;
+foreach sm = [90, 100, 101, 103, 120, 121] in {
+ def SM#sm#a : FeatureSM<""#sm#"a", !add(!mul(sm, 10), 1)>;
+}
+
+// Family-specific targets. PTX for these is compatible within the same family.
+foreach sm = [100, 101, 103, 120, 121] in {
+ def SM#sm#f : FeatureSM<""#sm#"f", !add(!mul(sm, 10), 2)>;
+}
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,
70, 71, 72, 73, 74, 75, 76, 77, 78,
@@ -83,14 +85,19 @@ def : Proc<"sm_90", [SM90, PTX78]>;
def : Proc<"sm_90a", [SM90a, PTX80]>;
def : Proc<"sm_100", [SM100, PTX86]>;
def : Proc<"sm_100a", [SM100a, PTX86]>;
+def : Proc<"sm_100f", [SM100f, PTX88]>;
def : Proc<"sm_101", [SM101, PTX86]>;
def : Proc<"sm_101a", [SM101a, PTX86]>;
+def : Proc<"sm_101f", [SM101f, PTX88]>;
def : Proc<"sm_103", [SM103, PTX88]>;
def : Proc<"sm_103a", [SM103a, PTX88]>;
+def : Proc<"sm_103f", [SM103f, PTX88]>;
def : Proc<"sm_120", [SM120, PTX87]>;
def : Proc<"sm_120a", [SM120a, PTX87]>;
+def : Proc<"sm_120f", [SM120f, PTX88]>;
def : Proc<"sm_121", [SM121, PTX88]>;
def : Proc<"sm_121a", [SM121a, PTX88]>;
+def : Proc<"sm_121f", [SM121f, PTX88]>;
def NVPTXInstrInfo : InstrInfo {
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 5136b1ee28502..5e4ab9476cb31 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -132,10 +132,29 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
// are supported on the specified architecture only, hence such targets do not
// follow the onion layer model. hasArchAccelFeatures() allows
// distinguishing such GPU variants from the base GPU architecture.
- // - 0 represents base GPU model,
- // - non-zero value identifies particular architecture-accelerated variant.
- bool hasArchAccelFeatures() const { return getFullSmVersion() % 10; }
-
+ // - false represents non-accelerated architecture.
+ // - true represents architecture-accelerated variant.
+ bool hasArchAccelFeatures() const {
+ auto FullSMVersionMod = getFullSmVersion() % 10;
+ assert(FullSMVersionMod < 3 && "Invalid architecture!");
+ return FullSMVersionMod == 1;
+ }
+ // GPUs with 'f' suffix have architecture-accelerated features which are
+ // portable across all future architectures under same SM major. For example,
+ // sm_100f features will work for sm_10X future architectures.
+ // - false represents non-family-specific architecture.
+ // - true represents family-specific variant.
+ bool hasFamilySpecificFeatures() const {
+ auto FullSMVersionMod = getFullSmVersion() % 10;
+ assert(FullSMVersionMod < 3 && "Invalid architecture!");
+ return FullSMVersionMod == 2 && PTXVersion >= 88;
+ }
+ // Checks if architecture is accelerated or family-specific.
+ // - false represents neither arch-accelerated nor family-specific arch.
+ // - true represents either arch-accelerated or family-specific arch.
+ bool hasArchAccelOrFamilySpecificFeatures() const {
+ return hasArchAccelFeatures() || hasFamilySpecificFeatures();
+ }
// If the user did not provide a target we default to the `sm_30` target.
std::string getTargetName() const {
return TargetName.empty() ? "sm_30" : TargetName;
>From 203c577aa185fb2de43590826a9373d421812f16 Mon Sep 17 00:00:00 2001
From: rbajpai <rbajpai at nvidia.com>
Date: Thu, 29 May 2025 17:18:48 +0530
Subject: [PATCH 2/4] Addressed review comments.
---
llvm/lib/Target/NVPTX/NVPTX.td | 17 +++++++++--------
llvm/test/CodeGen/NVPTX/sm-version.ll | 20 ++++++++++++++++++++
2 files changed, 29 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td
index 3ed2553fa4232..4d5a36e84a2d7 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/llvm/lib/Target/NVPTX/NVPTX.td
@@ -36,18 +36,19 @@ class FeaturePTX<int version>:
foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
60, 61, 62, 70, 72, 75, 80, 86, 87,
- 89, 90, 100, 101, 103, 120, 121] in
+ 89, 90] in
def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;
-// Arch-specific targets. PTX for these is not compatible with any other
-// architectures.
-foreach sm = [90, 100, 101, 103, 120, 121] in {
- def SM#sm#a : FeatureSM<""#sm#"a", !add(!mul(sm, 10), 1)>;
-}
+// Full SM version for sm_90a is 901
+def SM90a: FeatureSM<"90a", 901>;
-// Family-specific targets. PTX for these is compatible within the same family.
foreach sm = [100, 101, 103, 120, 121] in {
- def SM#sm#f : FeatureSM<""#sm#"f", !add(!mul(sm, 10), 2)>;
+ def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;
+ // Arch-specific targets. PTX for these is not compatible with any other
+ // architectures.
+ def SM#sm#a: FeatureSM<""#sm#"a", !add(!mul(sm, 10), 1)>;
+ // Family-specific targets. PTX for these is compatible within the same family.
+ def SM#sm#f: FeatureSM<""#sm#"f", !add(!mul(sm, 10), 2)>;
}
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,
diff --git a/llvm/test/CodeGen/NVPTX/sm-version.ll b/llvm/test/CodeGen/NVPTX/sm-version.ll
index 9705a2f3ba730..3a154a1b9ac9c 100644
--- a/llvm/test/CodeGen/NVPTX/sm-version.ll
+++ b/llvm/test/CodeGen/NVPTX/sm-version.ll
@@ -18,14 +18,19 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_90a | FileCheck %s --check-prefix=SM90a
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_100 | FileCheck %s --check-prefix=SM100
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_100a | FileCheck %s --check-prefix=SM100a
+; RUN: llc < %s -mtriple=nvptx -mcpu=sm_100f | FileCheck %s --check-prefix=SM100f
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_101 | FileCheck %s --check-prefix=SM101
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_101a | FileCheck %s --check-prefix=SM101a
+; RUN: llc < %s -mtriple=nvptx -mcpu=sm_101f | FileCheck %s --check-prefix=SM101f
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_103 | FileCheck %s --check-prefix=SM103
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_103a | FileCheck %s --check-prefix=SM103a
+; RUN: llc < %s -mtriple=nvptx -mcpu=sm_103f | FileCheck %s --check-prefix=SM103f
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_120 | FileCheck %s --check-prefix=SM120
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_120a | FileCheck %s --check-prefix=SM120a
+; RUN: llc < %s -mtriple=nvptx -mcpu=sm_120f | FileCheck %s --check-prefix=SM120f
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_121 | FileCheck %s --check-prefix=SM121
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_121a | FileCheck %s --check-prefix=SM121a
+; RUN: llc < %s -mtriple=nvptx -mcpu=sm_121f | FileCheck %s --check-prefix=SM121f
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=SM20
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_21 | FileCheck %s --check-prefix=SM21
@@ -47,14 +52,19 @@
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90a | FileCheck %s --check-prefix=SM90a
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100 | FileCheck %s --check-prefix=SM100
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a | FileCheck %s --check-prefix=SM100a
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100f | FileCheck %s --check-prefix=SM100f
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_101 | FileCheck %s --check-prefix=SM101
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_101a | FileCheck %s --check-prefix=SM101a
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_101f | FileCheck %s --check-prefix=SM101f
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_103 | FileCheck %s --check-prefix=SM103
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_103a | FileCheck %s --check-prefix=SM103a
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_103f | FileCheck %s --check-prefix=SM103f
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_120 | FileCheck %s --check-prefix=SM120
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_120a | FileCheck %s --check-prefix=SM120a
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_120f | FileCheck %s --check-prefix=SM120f
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_121 | FileCheck %s --check-prefix=SM121
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_121a | FileCheck %s --check-prefix=SM121a
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_121f | FileCheck %s --check-prefix=SM121f
; SM20: .version 3.2
; SM21: .version 3.2
@@ -76,14 +86,19 @@
; SM90a: .version 8.0
; SM100: .version 8.6
; SM100a: .version 8.6
+; SM100f: .version 8.8
; SM101: .version 8.6
; SM101a: .version 8.6
+; SM101f: .version 8.8
; SM103: .version 8.8
; SM103a: .version 8.8
+; SM103f: .version 8.8
; SM120: .version 8.7
; SM120a: .version 8.7
+; SM120f: .version 8.8
; SM121: .version 8.8
; SM121a: .version 8.8
+; SM121f: .version 8.8
; SM20: .target sm_20
; SM21: .target sm_21
@@ -105,11 +120,16 @@
; SM90a: .target sm_90a
; SM100: .target sm_100
; SM100a: .target sm_100a
+; SM100f: .target sm_100f
; SM101: .target sm_101
; SM101a: .target sm_101a
+; SM101f: .target sm_101f
; SM103: .target sm_103
; SM103a: .target sm_103a
+; SM103f: .target sm_103f
; SM120: .target sm_120
; SM120a: .target sm_120a
+; SM120f: .target sm_120f
; SM121: .target sm_121
; SM121a: .target sm_121a
+; SM121f: .target sm_121f
>From e2837d17e90621e655b3e12d1c46d46cc4d41eb6 Mon Sep 17 00:00:00 2001
From: rbajpai <rbajpai at nvidia.com>
Date: Fri, 30 May 2025 15:42:52 +0530
Subject: [PATCH 3/4] Changed Full SM version according to the review comments.
---
llvm/lib/Target/NVPTX/NVPTX.td | 33 +++++++++++++++----------
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 8 +++---
llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 22 ++++-------------
3 files changed, 29 insertions(+), 34 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td
index 4d5a36e84a2d7..b5c20e24bb700 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/llvm/lib/Target/NVPTX/NVPTX.td
@@ -36,19 +36,26 @@ class FeaturePTX<int version>:
foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
60, 61, 62, 70, 72, 75, 80, 86, 87,
- 89, 90] in
- def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;
-
-// Full SM version for sm_90a is 901
-def SM90a: FeatureSM<"90a", 901>;
-
-foreach sm = [100, 101, 103, 120, 121] in {
- def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;
- // Arch-specific targets. PTX for these is not compatible with any other
- // architectures.
- def SM#sm#a: FeatureSM<""#sm#"a", !add(!mul(sm, 10), 1)>;
- // Family-specific targets. PTX for these is compatible within the same family.
- def SM#sm#f: FeatureSM<""#sm#"f", !add(!mul(sm, 10), 2)>;
+ 89, 90, 100, 101, 103, 120, 121] in {
+ // Base SM version (e.g. FullSMVersion for sm_100 is 10000)
+ def SM#sm : FeatureSM<""#sm, !mul(sm, 100)>;
+
+ // Note: Subset of the architecture-specific features, normally
+ // available in "a" variants that will be compatible with subsequent targets
+ // in the same family. I.e they are only ordered within the major architecture,
+ // but are not comparable with other major architectures
+
+ // Family-specific targets which are compatible within same family
+ // (e.g. FullSMVersion for sm_100f is 10010)
+ if !ge(sm, 100) then {
+ def SM#sm#f : FeatureSM<""#sm#"f", !add(!mul(sm, 100), 10)>;
+ }
+
+ // Architecture-specific targets which are incompatible across architectures
+ // (e.g. FullSMVersion for sm_100a is 10011)
+ if !ge(sm, 90) then {
+ def SM#sm#a : FeatureSM<""#sm#"a", !add(!mul(sm, 100), 11)>;
+ }
}
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 98027c5aa9c22..ec72a7790f75e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -166,10 +166,10 @@ class hasPTX<int version>: Predicate<"Subtarget->getPTXVersion() >= " # version>
class hasSM<int version>: Predicate<"Subtarget->getSmVersion() >= " # version>;
// Explicit records for arch-accelerated SM versions
-def hasSM90a : Predicate<"Subtarget->getFullSmVersion() == 901">;
-def hasSM100a : Predicate<"Subtarget->getFullSmVersion() == 1001">;
-def hasSM101a : Predicate<"Subtarget->getFullSmVersion() == 1011">;
-def hasSM120a : Predicate<"Subtarget->getFullSmVersion() == 1201">;
+def hasSM90a : Predicate<"Subtarget->getSmVersion() == 90 && Subtarget->hasArchAccelFeatures()">;
+def hasSM100a : Predicate<"Subtarget->getSmVersion() == 100 && Subtarget->hasArchAccelFeatures()">;
+def hasSM101a : Predicate<"Subtarget->getSmVersion() == 101 && Subtarget->hasArchAccelFeatures()">;
+def hasSM120a : Predicate<"Subtarget->getSmVersion() == 120 && Subtarget->hasArchAccelFeatures()">;
// non-sync shfl instructions are not available on sm_70+ in PTX6.4+
def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70"
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 5e4ab9476cb31..825fd0677c001 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -108,8 +108,8 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
switch (FullSmVersion) {
default:
break;
- case 1001: // sm_100a
- case 1011: // sm_101a
+ case 10011: // sm_100a
+ case 10111: // sm_101a
HasTcgen05 = true;
break;
}
@@ -127,33 +127,21 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
bool hasPTXASUnreachableBug() const { return PTXVersion < 83; }
bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
unsigned int getFullSmVersion() const { return FullSmVersion; }
- unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
+ unsigned int getSmVersion() const { return getFullSmVersion() / 100; }
// GPUs with "a" suffix have include architecture-accelerated features that
// are supported on the specified architecture only, hence such targets do not
// follow the onion layer model. hasArchAccelFeatures() allows
// distinguishing such GPU variants from the base GPU architecture.
// - false represents non-accelerated architecture.
// - true represents architecture-accelerated variant.
- bool hasArchAccelFeatures() const {
- auto FullSMVersionMod = getFullSmVersion() % 10;
- assert(FullSMVersionMod < 3 && "Invalid architecture!");
- return FullSMVersionMod == 1;
- }
+ bool hasArchAccelFeatures() const { return getFullSmVersion() % 10; }
// GPUs with 'f' suffix have architecture-accelerated features which are
// portable across all future architectures under same SM major. For example,
// sm_100f features will work for sm_10X future architectures.
// - false represents non-family-specific architecture.
// - true represents family-specific variant.
bool hasFamilySpecificFeatures() const {
- auto FullSMVersionMod = getFullSmVersion() % 10;
- assert(FullSMVersionMod < 3 && "Invalid architecture!");
- return FullSMVersionMod == 2 && PTXVersion >= 88;
- }
- // Checks if architecture is accelerated or family-specific.
- // - false represents neither arch-accelerated nor family-specific arch.
- // - true represents either arch-accelerated or family-specific arch.
- bool hasArchAccelOrFamilySpecificFeatures() const {
- return hasArchAccelFeatures() || hasFamilySpecificFeatures();
+ return getFullSmVersion() % 100 != 0 && PTXVersion >= 88;
}
// If the user did not provide a target we default to the `sm_30` target.
std::string getTargetName() const {
>From e5425762ec4d03a402971bca2fb4cc0b41be4ba3 Mon Sep 17 00:00:00 2001
From: rbajpai <rbajpai at nvidia.com>
Date: Sat, 31 May 2025 15:11:36 +0530
Subject: [PATCH 4/4] Added NVPTX architecture description
---
llvm/lib/Target/NVPTX/NVPTX.td | 43 ++++++++++++++++++++++----
llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 2 +-
2 files changed, 38 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td
index b5c20e24bb700..84d45cde189fc 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/llvm/lib/Target/NVPTX/NVPTX.td
@@ -33,18 +33,49 @@ class FeaturePTX<int version>:
SubtargetFeature<"ptx"# version, "PTXVersion",
"" # version,
"Use PTX version " # version>;
-
+//
+// NVPTX Architecture Hierarchy and Ordering:
+//
+// Family: 2/3/5/6/7/8/9/10/12 (Follows Onion model, older family is compatible with newer family)
+// Arch: 2*/3*/5*/6*/7*/8*/9*/10*/12*
+//
+// Family-specific: F*f : F*f > F* =>
+// + The plain base architecture is compatible with the family-specific architecture
+// (e.g. sm_100 compatible with >= sm_100*f*)
+// + The family-specific architecture is compatible with future family-specific
+// architectures within the same family (e.g. sm_100f compatible with >= sm_10X*f*
+// but not with sm_12X*f*)
+//
+// Family and SM Target Definition:
+// +----------------+--------------------------------------------------------+
+// | Family | Target SM architectures included |
+// +----------------+--------------------------------------------------------+
+// | sm_10x family | sm_100f, sm_103f, future targets in sm_10x family |
+// | sm_101 family | sm_101f (exception) |
+// | sm_12x family | sm_120f, sm_121f, future targets in sm_12x family |
+// +----------------+--------------------------------------------------------+
+//
+// Architecture-specific: F*a : F*a > F*f > F* =>
+// + The plain base architecture is compatible with the architecture-specific architecture
+// (e.g. sm_100 compatible with >= sm_100*a*)
+// + The family-specific architecture is compatible with the architecture-specific architecture
+// (e.g. sm_100f compatible with >= sm_100*a*)
+// + The architecture-specific architecture is incompatible with any other architecture
+// (e.g. sm_100a is only compatible with sm_100*a*)
+//
+// Encoding: Arch * 1000 + 'f' * 10 + 'a' * 1 (where 'a' ⇒ 'f')
+//
+// This encoding allows simple implementation of the partial ordering of the architectures.
+// + Compare Family and Arch by dividing FullSMVersion by 1000 and 100 respectively before the comparison.
+// + Compare within the family by comparing FullSMVersion, given both belongs to the same family.
+// + Detect 'a' variants by checking FullSMVersion % 10.
+//
foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
60, 61, 62, 70, 72, 75, 80, 86, 87,
89, 90, 100, 101, 103, 120, 121] in {
// Base SM version (e.g. FullSMVersion for sm_100 is 10000)
def SM#sm : FeatureSM<""#sm, !mul(sm, 100)>;
- // Note: Subset of the architecture-specific features, normally
- // available in "a" variants that will be compatible with subsequent targets
- // in the same family. I.e they are only ordered within the major architecture,
- // but are not comparable with other major architectures
-
// Family-specific targets which are compatible within same family
// (e.g. FullSMVersion for sm_100f is 10010)
if !ge(sm, 100) then {
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 825fd0677c001..6be6d81ca103d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -137,7 +137,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
bool hasArchAccelFeatures() const { return getFullSmVersion() % 10; }
// GPUs with 'f' suffix have architecture-accelerated features which are
// portable across all future architectures under same SM major. For example,
- // sm_100f features will work for sm_10X future architectures.
+ // sm_100f features will work for sm_10X*f*/sm_10X*a* future architectures.
// - false represents non-family-specific architecture.
// - true represents family-specific variant.
bool hasFamilySpecificFeatures() const {
More information about the llvm-commits
mailing list