[llvm] d677505 - [NVPTX] Set default version of architecture to SM_30, PTX to 6.0.

Tue Jan 10 04:23:11 PST 2023

Author: Pavel Kopyl
Date: 2023-01-10T15:22:40+03:00
New Revision: d6775052de6b7aaeaef17fdac1c796c2b99a91ee

URL: https://github.com/llvm/llvm-project/commit/d6775052de6b7aaeaef17fdac1c796c2b99a91ee
DIFF: https://github.com/llvm/llvm-project/commit/d6775052de6b7aaeaef17fdac1c796c2b99a91ee.diff

LOG: [NVPTX] Set default version of architecture to SM_30, PTX to 6.0.

Support of variadic functions triggers an assertion on several tests
from llvm/test/CodeGen/Generic/ if nvptx64-* is specified as a default
triplet:

Support for variadic functions (unsized array parameter) introduced in
PTX ISA version 6.0 and requires target sm_30.

That happens because those tests contain variadic function calls and
default versions of both PTX ISA (3.2) and architecture (sm_20) are
below the minimally required.

There were no observable problems with these tests before adding
support of variadic functions, because nvptx backend just didn't
handle them properly generating invalid PTX code.

Differential Revision: https://reviews.llvm.org/D141054

Added: 
    

Modified: 
    llvm/lib/Target/NVPTX/NVPTX.td
    llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
    llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
    llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll
    llvm/test/CodeGen/NVPTX/sm-version.ll
    llvm/test/CodeGen/NVPTX/surf-tex.py
    llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td
index dcdd1286df382..4d4203c503768 100644

--- a/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/llvm/lib/Target/NVPTX/NVPTX.td
@@ -115,11 +115,11 @@ def PTX78 : SubtargetFeature<"ptx78", "PTXVersion", "78",
 class Proc<string Name, list<SubtargetFeature> Features>
  : Processor<Name, NoItineraries, Features>;
 
-def : Proc<"sm_20", [SM20]>;
-def : Proc<"sm_21", [SM21]>;
+def : Proc<"sm_20", [SM20, PTX32]>;
+def : Proc<"sm_21", [SM21, PTX32]>;
 def : Proc<"sm_30", [SM30]>;
 def : Proc<"sm_32", [SM32, PTX40]>;
-def : Proc<"sm_35", [SM35]>;
+def : Proc<"sm_35", [SM35, PTX32]>;
 def : Proc<"sm_37", [SM37, PTX41]>;
 def : Proc<"sm_50", [SM50, PTX40]>;
 def : Proc<"sm_52", [SM52, PTX41]>;

diff  --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
index a03492a92bacb..2347f46449d5f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -33,13 +33,13 @@ void NVPTXSubtarget::anchor() {}
 NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
                                                                 StringRef FS) {
     // Provide the default CPU if we don't have one.
-    TargetName = std::string(CPU.empty() ? "sm_20" : CPU);
+    TargetName = std::string(CPU.empty() ? "sm_30" : CPU);
 
     ParseSubtargetFeatures(TargetName, /*TuneCPU*/ TargetName, FS);
 
-    // Set default to PTX 3.2 (CUDA 5.5)
+    // Set default to PTX 6.0 (CUDA 9.0)
     if (PTXVersion == 0) {
-      PTXVersion = 32;
+      PTXVersion = 60;
   }
 
   return *this;

diff  --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
index c4be3b0ab8a32..e8c554c9ed528 100644
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
@@ -1,7 +1,7 @@
 ; Libdevice in recent CUDA versions relies on __CUDA_ARCH reflecting GPU type.
 ; Verify that __nvvm_reflect() is replaced with an appropriate value.
 ;
-; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 \
+; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_20 \
 ; RUN:   | FileCheck %s --check-prefixes=COMMON,SM20
 ; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_35 \
 ; RUN:   | FileCheck %s --check-prefixes=COMMON,SM35

diff  --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll
index 7a5a5a7c266e0..2c9ea4742de84 100644
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll
@@ -1,6 +1,6 @@
 ; Verify that __nvvm_reflect_ocl() is replaced with an appropriate value
 ;
-; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 \
+; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_20 \
 ; RUN:   | FileCheck %s --check-prefixes=COMMON,SM20
 ; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_35 \
 ; RUN:   | FileCheck %s --check-prefixes=COMMON,SM35

diff  --git a/llvm/test/CodeGen/NVPTX/sm-version.ll b/llvm/test/CodeGen/NVPTX/sm-version.ll
index 7cb56a81bab9d..eacd7f38e1269 100644
--- a/llvm/test/CodeGen/NVPTX/sm-version.ll
+++ b/llvm/test/CodeGen/NVPTX/sm-version.ll
@@ -32,7 +32,9 @@
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck %s --check-prefix=SM80
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_86 | FileCheck %s --check-prefix=SM86
 
-; SM30: .version 3.2
+; SM20: .version 3.2
+; SM21: .version 3.2
+; SM30: .version 6.0
 ; SM32: .version 4.0
 ; SM35: .version 3.2
 ; SM37: .version 4.1

diff  --git a/llvm/test/CodeGen/NVPTX/surf-tex.py b/llvm/test/CodeGen/NVPTX/surf-tex.py
index c0b40f1d927bb..4e239ae7b4f48 100644
--- a/llvm/test/CodeGen/NVPTX/surf-tex.py
+++ b/llvm/test/CodeGen/NVPTX/surf-tex.py
@@ -1,6 +1,6 @@
 # RUN: %python %s --target=cuda --tests=suld,sust,tex,tld4 --gen-list=%t.list > %t-cuda.ll
-# RUN: llc %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll
-# RUN: %if ptxas %{ llc %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %}
+# RUN: llc -mcpu=sm_20 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll
+# RUN: %if ptxas %{ llc -mcpu=sm_20 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %}
 
 # We only need to run this second time for texture tests, because
 # there is a 
diff erence between unified and non-unified intrinsics.

diff  --git a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll
index b5c13da083f44..65feae836ea7a 100644
--- a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll
+++ b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda | FileCheck %s
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-nvidia-cuda | %ptxas-verify %}
 
-; CHECK: .target sm_20, debug
+; CHECK: .target sm_30, debug
 
 ; CHECK: .visible .func use_dbg_declare()
 ; CHECK: .local .align 8 .b8 __local_depot0[8];