[clang] [llvm] Fix nvptx range check (PR #136296)

Fri Apr 18 04:54:51 PDT 2025

https://github.com/jurahul created https://github.com/llvm/llvm-project/pull/136296

None

>From 8d0178850b74c568c03e98de47dbc9a94adedd05 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Thu, 17 Apr 2025 15:59:56 -0700
Subject: [PATCH 1/2] [NFC][LLVM][TableGen] Use `decodeULEB128` for
 `OPC_SoftFail` emission

- Use `decodeULEB128` to decode +ve/-ve mask in OPC_SoftFail case.
- Use current I/E iterators as inputs to `decodeULEB128`.
---
 llvm/utils/TableGen/DecoderEmitter.cpp | 50 ++++++++++----------------
 1 file changed, 18 insertions(+), 32 deletions(-)

diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 75c8c80aebd6d..a3fe49ef25de7 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -849,8 +849,7 @@ void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
 
       // ULEB128 encoded start value.
       const char *ErrMsg = nullptr;
-      unsigned Start = decodeULEB128(Table.data() + Pos + 1, nullptr,
-                                     Table.data() + Table.size(), &ErrMsg);
+      unsigned Start = decodeULEB128(&*I, nullptr, &*E, &ErrMsg);
       assert(ErrMsg == nullptr && "ULEB128 value too large!");
       emitULEB128(I, OS);
 
@@ -904,8 +903,7 @@ void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
       ++I;
       // Decode the Opcode value.
       const char *ErrMsg = nullptr;
-      unsigned Opc = decodeULEB128(Table.data() + Pos + 1, nullptr,
-                                   Table.data() + Table.size(), &ErrMsg);
+      unsigned Opc = decodeULEB128(&*I, nullptr, &*E, &ErrMsg);
       assert(ErrMsg == nullptr && "ULEB128 value too large!");
 
       OS << Indent << "MCD::OPC_" << (IsTry ? "Try" : "") << "Decode, ";
@@ -934,34 +932,22 @@ void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
     }
     case MCD::OPC_SoftFail: {
       ++I;
-      OS << Indent << "MCD::OPC_SoftFail";
-      // Positive mask
-      uint64_t Value = 0;
-      unsigned Shift = 0;
-      do {
-        OS << ", " << (unsigned)*I;
-        Value += ((uint64_t)(*I & 0x7f)) << Shift;
-        Shift += 7;
-      } while (*I++ >= 128);
-      if (Value > 127) {
-        OS << " /* 0x";
-        OS.write_hex(Value);
-        OS << " */";
-      }
-      // Negative mask
-      Value = 0;
-      Shift = 0;
-      do {
-        OS << ", " << (unsigned)*I;
-        Value += ((uint64_t)(*I & 0x7f)) << Shift;
-        Shift += 7;
-      } while (*I++ >= 128);
-      if (Value > 127) {
-        OS << " /* 0x";
-        OS.write_hex(Value);
-        OS << " */";
-      }
-      OS << ",\n";
+      OS << Indent << "MCD::OPC_SoftFail, ";
+      // Decode the positive mask.
+      const char *ErrMsg = nullptr;
+      uint64_t PositiveMask = decodeULEB128(&*I, nullptr, &*E, &ErrMsg);
+      assert(ErrMsg == nullptr && "ULEB128 value too large!");
+      emitULEB128(I, OS);
+
+      // Decode the negative mask.
+      uint64_t NegativeMask = decodeULEB128(&*I, nullptr, &*E, &ErrMsg);
+      assert(ErrMsg == nullptr && "ULEB128 value too large!");
+      emitULEB128(I, OS);
+      OS << "// +ve mask: 0x";
+      OS.write_hex(PositiveMask);
+      OS << ", -ve mask: 0x";
+      OS.write_hex(NegativeMask);
+      OS << '\n';
       break;
     }
     case MCD::OPC_Fail: {

>From 6d025c7114c7aaac9ba0c00f2f89bb8ce6aed76c Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Fri, 18 Apr 2025 04:52:05 -0700
Subject: [PATCH 2/2] [Clang][GPU] Fix test to not have range on NVPTX tid.x

- llvm.nvvm.read.ptx.sreg.tid.x intrinsics does not have the
  output range attribute yet.
---
 clang/test/Headers/gpuintrin_lang.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/Headers/gpuintrin_lang.c b/clang/test/Headers/gpuintrin_lang.c
index ab660ac5c8a49..b804d46071507 100644
--- a/clang/test/Headers/gpuintrin_lang.c
+++ b/clang/test/Headers/gpuintrin_lang.c
@@ -36,7 +36,7 @@ __device__ int foo() { return __gpu_thread_id_x(); }
 // CUDA-LABEL: define dso_local i32 @foo(
 // CUDA-SAME: ) #[[ATTR0:[0-9]+]] {
 // CUDA-NEXT:  [[ENTRY:.*:]]
-// CUDA-NEXT:    [[TMP0:%.*]] = call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+// CUDA-NEXT:    [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
 // CUDA-NEXT:    ret i32 [[TMP0]]
 //
 // HIP-LABEL: define dso_local i32 @foo(