[Mlir-commits] [mlir] [mlir][spirv] Truncate Literal String size at max number words (PR #142916)

Davide Grohmann llvmlistbot at llvm.org
Tue Jun 10 00:18:05 PDT 2025


https://github.com/davidegrohmann updated https://github.com/llvm/llvm-project/pull/142916

>From 54172aac4f2be65ec10fd484b47d9600c208d9e3 Mon Sep 17 00:00:00 2001
From: Davide Grohmann <davide.grohmann at arm.com>
Date: Wed, 29 Jan 2025 12:56:47 +0100
Subject: [PATCH 1/3] [mlir][spirv] Truncate Literal String size at max number
 words

If not truncated the SPIRV serialization would not fail but instead
produce an invalid SPIR-V module.

Change-Id: I54c4e54d6ad081861b524d4ae236a1e5080b88c4
Signed-off-by: Davide Grohmann <davide.grohmann at arm.com>
---
 .../include/mlir/Target/SPIRV/SPIRVBinaryUtils.h |  6 ++++++
 mlir/lib/Target/SPIRV/SPIRVBinaryUtils.cpp       | 16 +++++++++++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Target/SPIRV/SPIRVBinaryUtils.h b/mlir/include/mlir/Target/SPIRV/SPIRVBinaryUtils.h
index e46a576f1d48e..d3847ae3d3bb2 100644
--- a/mlir/include/mlir/Target/SPIRV/SPIRVBinaryUtils.h
+++ b/mlir/include/mlir/Target/SPIRV/SPIRVBinaryUtils.h
@@ -30,6 +30,12 @@ constexpr uint32_t kMagicNumber = 0x07230203;
 /// The serializer tool ID registered to the Khronos Group
 constexpr uint32_t kGeneratorNumber = 22;
 
+// Max number of words
+constexpr uint32_t kMaxWordCount = 65535;
+
+// Max number of words for literal
+constexpr uint32_t kMaxLiteralWordCount = kMaxWordCount - 3;
+
 /// Appends a SPRI-V module header to `header` with the given `version` and
 /// `idBound`.
 void appendModuleHeader(SmallVectorImpl<uint32_t> &header,
diff --git a/mlir/lib/Target/SPIRV/SPIRVBinaryUtils.cpp b/mlir/lib/Target/SPIRV/SPIRVBinaryUtils.cpp
index 31205d8f408f1..4d4d67a012ae1 100644
--- a/mlir/lib/Target/SPIRV/SPIRVBinaryUtils.cpp
+++ b/mlir/lib/Target/SPIRV/SPIRVBinaryUtils.cpp
@@ -13,6 +13,9 @@
 #include "mlir/Target/SPIRV/SPIRVBinaryUtils.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVTypes.h"
 #include "llvm/Config/llvm-config.h" // for LLVM_VERSION_MAJOR
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "spirv-binary-utils"
 
 using namespace mlir;
 
@@ -68,7 +71,18 @@ void spirv::encodeStringLiteralInto(SmallVectorImpl<uint32_t> &binary,
                                     StringRef literal) {
   // We need to encode the literal and the null termination.
   auto encodingSize = literal.size() / 4 + 1;
+  auto sizeOfDataToCopy = literal.size();
+  if (encodingSize >= kMaxLiteralWordCount) {
+    // reserve one word for the null termination
+    encodingSize = kMaxLiteralWordCount - 1;
+    // do not override the last word (null termination) when copying
+    sizeOfDataToCopy = (encodingSize - 1) * 4;
+    LLVM_DEBUG(llvm::dbgs() << "Truncating string literal to max size ("
+                            << std::to_string(kMaxLiteralWordCount - 1)
+                            << "): " << literal << "\n");
+  }
   auto bufferStartSize = binary.size();
   binary.resize(bufferStartSize + encodingSize, 0);
-  std::memcpy(binary.data() + bufferStartSize, literal.data(), literal.size());
+  std::memcpy(binary.data() + bufferStartSize, literal.data(),
+              sizeOfDataToCopy);
 }

>From 89989ba67116f6801d3ddfde3cddad9b9a2b3be5 Mon Sep 17 00:00:00 2001
From: Davide Grohmann <davide.grohmann at arm.com>
Date: Fri, 6 Jun 2025 12:39:09 +0200
Subject: [PATCH 2/3] Fix review comments

Signed-off-by: Davide Grohmann <davide.grohmann at arm.com>
Change-Id: I57ece625a5621bd03fff310dc384b4ec036ffc0a
---
 .../include/mlir/Target/SPIRV/SPIRVBinaryUtils.h |  5 +++--
 mlir/lib/Target/SPIRV/SPIRVBinaryUtils.cpp       | 16 ++++++++--------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/mlir/include/mlir/Target/SPIRV/SPIRVBinaryUtils.h b/mlir/include/mlir/Target/SPIRV/SPIRVBinaryUtils.h
index d3847ae3d3bb2..746515371b63b 100644
--- a/mlir/include/mlir/Target/SPIRV/SPIRVBinaryUtils.h
+++ b/mlir/include/mlir/Target/SPIRV/SPIRVBinaryUtils.h
@@ -30,10 +30,11 @@ constexpr uint32_t kMagicNumber = 0x07230203;
 /// The serializer tool ID registered to the Khronos Group
 constexpr uint32_t kGeneratorNumber = 22;
 
-// Max number of words
+/// Max number of words
+/// See https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_universal_limits
 constexpr uint32_t kMaxWordCount = 65535;
 
-// Max number of words for literal
+/// Max number of words for literal
 constexpr uint32_t kMaxLiteralWordCount = kMaxWordCount - 3;
 
 /// Appends a SPRI-V module header to `header` with the given `version` and
diff --git a/mlir/lib/Target/SPIRV/SPIRVBinaryUtils.cpp b/mlir/lib/Target/SPIRV/SPIRVBinaryUtils.cpp
index 4d4d67a012ae1..6fdd98c6a3116 100644
--- a/mlir/lib/Target/SPIRV/SPIRVBinaryUtils.cpp
+++ b/mlir/lib/Target/SPIRV/SPIRVBinaryUtils.cpp
@@ -70,18 +70,18 @@ uint32_t spirv::getPrefixedOpcode(uint32_t wordCount, spirv::Opcode opcode) {
 void spirv::encodeStringLiteralInto(SmallVectorImpl<uint32_t> &binary,
                                     StringRef literal) {
   // We need to encode the literal and the null termination.
-  auto encodingSize = literal.size() / 4 + 1;
-  auto sizeOfDataToCopy = literal.size();
+  size_t encodingSize = literal.size() / 4 + 1;
+  size_t sizeOfDataToCopy = literal.size();
   if (encodingSize >= kMaxLiteralWordCount) {
-    // reserve one word for the null termination
+    // Reserve one word for the null termination
     encodingSize = kMaxLiteralWordCount - 1;
-    // do not override the last word (null termination) when copying
+    // Do not override the last word (null termination) when copying
     sizeOfDataToCopy = (encodingSize - 1) * 4;
-    LLVM_DEBUG(llvm::dbgs() << "Truncating string literal to max size ("
-                            << std::to_string(kMaxLiteralWordCount - 1)
-                            << "): " << literal << "\n");
+    LLVM_DEBUG(llvm::dbgs()
+               << "Truncating string literal to max size ("
+               << (kMaxLiteralWordCount - 1) << "): " << literal << "\n");
   }
-  auto bufferStartSize = binary.size();
+  size_t bufferStartSize = binary.size();
   binary.resize(bufferStartSize + encodingSize, 0);
   std::memcpy(binary.data() + bufferStartSize, literal.data(),
               sizeOfDataToCopy);

>From fefa194df957aa95b4f4518781c0a1870fd2a597 Mon Sep 17 00:00:00 2001
From: Davide Grohmann <davide.grohmann at arm.com>
Date: Tue, 10 Jun 2025 09:10:44 +0200
Subject: [PATCH 3/3] Add missing full stop marks

Signed-off-by: Davide Grohmann <davide.grohmann at arm.com>
Change-Id: I8b5bed77d4ac38a4b4735d123c99a1a687a64ec7
---
 mlir/lib/Target/SPIRV/SPIRVBinaryUtils.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/Target/SPIRV/SPIRVBinaryUtils.cpp b/mlir/lib/Target/SPIRV/SPIRVBinaryUtils.cpp
index 6fdd98c6a3116..0ec468d4c1665 100644
--- a/mlir/lib/Target/SPIRV/SPIRVBinaryUtils.cpp
+++ b/mlir/lib/Target/SPIRV/SPIRVBinaryUtils.cpp
@@ -73,9 +73,9 @@ void spirv::encodeStringLiteralInto(SmallVectorImpl<uint32_t> &binary,
   size_t encodingSize = literal.size() / 4 + 1;
   size_t sizeOfDataToCopy = literal.size();
   if (encodingSize >= kMaxLiteralWordCount) {
-    // Reserve one word for the null termination
+    // Reserve one word for the null termination.
     encodingSize = kMaxLiteralWordCount - 1;
-    // Do not override the last word (null termination) when copying
+    // Do not override the last word (null termination) when copying.
     sizeOfDataToCopy = (encodingSize - 1) * 4;
     LLVM_DEBUG(llvm::dbgs()
                << "Truncating string literal to max size ("



More information about the Mlir-commits mailing list