[llvm] 3673cc7 - [llvm-rc] Don't interpret integer literals as octal numbers in rc.exe mode (#166915)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 8 12:24:32 PST 2025
Author: Martin Storsjö
Date: 2025-11-08T22:24:29+02:00
New Revision: 3673cc7a4222c6b60d8bb287ca048efa37f61e3b
URL: https://github.com/llvm/llvm-project/commit/3673cc7a4222c6b60d8bb287ca048efa37f61e3b
DIFF: https://github.com/llvm/llvm-project/commit/3673cc7a4222c6b60d8bb287ca048efa37f61e3b.diff
LOG: [llvm-rc] Don't interpret integer literals as octal numbers in rc.exe mode (#166915)
It turns out that rc.exe doesn't interpret integer literals as octal
numbers - but GNU windres does. Previously, llvm-rc did interpret them
as octal.
Fix the issue by stripping away the leading zeros during tokenization.
The alternative (which would be somewhat cleaner, as visible in
tokenizer.test) would be to retain them in the RCToken object, but strip
them out before calling
StringRef::getAsInteger. Alternatively to handle the radix detection
locally in llvm-rc code and not rely on getAsInteger to autodetect it.
Both of those solutions require propagating the IsWindres flag so that
it is available within RCToken, or at least when calling
RCToken::intValue().
Fixes: https://github.com/llvm/llvm-project/issues/144723
Added:
llvm/test/tools/llvm-rc/Inputs/octal-in-range.rc
llvm/test/tools/llvm-rc/Inputs/octal-out-of-range.rc
llvm/test/tools/llvm-rc/octal.test
Modified:
llvm/test/tools/llvm-rc/Inputs/tokens.rc
llvm/test/tools/llvm-rc/tokenizer.test
llvm/tools/llvm-rc/ResourceScriptToken.cpp
llvm/tools/llvm-rc/ResourceScriptToken.h
llvm/tools/llvm-rc/ResourceScriptTokenList.def
llvm/tools/llvm-rc/llvm-rc.cpp
Removed:
################################################################################
diff --git a/llvm/test/tools/llvm-rc/Inputs/octal-in-range.rc b/llvm/test/tools/llvm-rc/Inputs/octal-in-range.rc
new file mode 100644
index 0000000000000..8327ef9be9f5c
--- /dev/null
+++ b/llvm/test/tools/llvm-rc/Inputs/octal-in-range.rc
@@ -0,0 +1,4 @@
+1 VERSIONINFO
+FILEVERSION 0010,0010,0010,0010
+BEGIN
+END
diff --git a/llvm/test/tools/llvm-rc/Inputs/octal-out-of-range.rc b/llvm/test/tools/llvm-rc/Inputs/octal-out-of-range.rc
new file mode 100644
index 0000000000000..ce520f245a48d
--- /dev/null
+++ b/llvm/test/tools/llvm-rc/Inputs/octal-out-of-range.rc
@@ -0,0 +1,4 @@
+1 VERSIONINFO
+FILEVERSION 9,08,09,1
+BEGIN
+END
diff --git a/llvm/test/tools/llvm-rc/Inputs/tokens.rc b/llvm/test/tools/llvm-rc/Inputs/tokens.rc
index 20f77912477d9..caf01aeff45fe 100644
--- a/llvm/test/tools/llvm-rc/Inputs/tokens.rc
+++ b/llvm/test/tools/llvm-rc/Inputs/tokens.rc
@@ -1,4 +1,4 @@
-1 + 2 - 3214L & 0x120894 032173 2|&~+(-7){0xabcdef 0xABCDEFl} Begin End
+1 + 2 - 3214L & 0x120894 032173 -0042 009 2|&~+(-7){0xabcdef 0xABCDEFl} Begin End
1*3/4
He11o LLVM
identifier-with-dashes
diff --git a/llvm/test/tools/llvm-rc/octal.test b/llvm/test/tools/llvm-rc/octal.test
new file mode 100644
index 0000000000000..686c1fcf1608e
--- /dev/null
+++ b/llvm/test/tools/llvm-rc/octal.test
@@ -0,0 +1,38 @@
+; RUN: llvm-rc -no-preprocess /FO %t.in-range-rc.res -- %p/Inputs/octal-in-range.rc
+; RUN: llvm-readobj %t.in-range-rc.res | FileCheck %s --check-prefix=IN-RANGE-RC
+; RUN: llvm-windres --no-preprocess %p/Inputs/octal-in-range.rc %t.in-range-windres.res
+; RUN: llvm-readobj %t.in-range-windres.res | FileCheck %s --check-prefix=IN-RANGE-WINDRES
+
+; IN-RANGE-RC: Data: (
+; IN-RANGE-RC-NEXT: 0000: 5C003400 00005600 53005F00 56004500 |\.4...V.S._.V.E.|
+; IN-RANGE-RC-NEXT: 0010: 52005300 49004F00 4E005F00 49004E00 |R.S.I.O.N._.I.N.|
+; IN-RANGE-RC-NEXT: 0020: 46004F00 00000000 BD04EFFE 00000100 |F.O.............|
+; IN-RANGE-RC-NEXT: 0030: 0A000A00 0A000A00 00000000 00000000 |................|
+; IN-RANGE-RC-NEXT: 0040: 00000000 00000000 00000000 00000000 |................|
+; IN-RANGE-RC-NEXT: 0050: 00000000 00000000 00000000 |............|
+; IN-RANGE-RC-NEXT: )
+
+; IN-RANGE-WINDRES: Data: (
+; IN-RANGE-WINDRES-NEXT: 0000: 5C003400 00005600 53005F00 56004500 |\.4...V.S._.V.E.|
+; IN-RANGE-WINDRES-NEXT: 0010: 52005300 49004F00 4E005F00 49004E00 |R.S.I.O.N._.I.N.|
+; IN-RANGE-WINDRES-NEXT: 0020: 46004F00 00000000 BD04EFFE 00000100 |F.O.............|
+; IN-RANGE-WINDRES-NEXT: 0030: 08000800 08000800 00000000 00000000 |................|
+; IN-RANGE-WINDRES-NEXT: 0040: 00000000 00000000 00000000 00000000 |................|
+; IN-RANGE-WINDRES-NEXT: 0050: 00000000 00000000 00000000 |............|
+; IN-RANGE-WINDRES-NEXT: )
+
+; RUN: llvm-rc -no-preprocess /FO %t.out-of-range-rc.res -- %p/Inputs/octal-out-of-range.rc
+; RUN: llvm-readobj %t.out-of-range-rc.res | FileCheck %s --check-prefix=OUT-OF-RANGE-RC
+; RUN: not llvm-windres --no-preprocess %p/Inputs/octal-out-of-range.rc %t.out-of-range-windres.res 2>&1 | FileCheck %s --check-prefix OUT-OF-RANGE-WINDRES
+
+; OUT-OF-RANGE-RC: Data: (
+; OUT-OF-RANGE-RC-NEXT: 0000: 5C003400 00005600 53005F00 56004500 |\.4...V.S._.V.E.|
+; OUT-OF-RANGE-RC-NEXT: 0010: 52005300 49004F00 4E005F00 49004E00 |R.S.I.O.N._.I.N.|
+; OUT-OF-RANGE-RC-NEXT: 0020: 46004F00 00000000 BD04EFFE 00000100 |F.O.............|
+; OUT-OF-RANGE-RC-NEXT: 0030: 08000900 01000900 00000000 00000000 |................|
+; OUT-OF-RANGE-RC-NEXT: 0040: 00000000 00000000 00000000 00000000 |................|
+; OUT-OF-RANGE-RC-NEXT: 0050: 00000000 00000000 00000000 |............|
+; OUT-OF-RANGE-RC-NEXT: )
+
+
+; OUT-OF-RANGE-WINDRES: llvm-rc: Error parsing file: Integer invalid or too large: 08
diff --git a/llvm/test/tools/llvm-rc/tokenizer.test b/llvm/test/tools/llvm-rc/tokenizer.test
index 3062e2bf64629..953b0ca8c1b57 100644
--- a/llvm/test/tools/llvm-rc/tokenizer.test
+++ b/llvm/test/tools/llvm-rc/tokenizer.test
@@ -9,7 +9,10 @@
; CHECK-NEXT: Int: 3214L; int value = 3214
; CHECK-NEXT: Amp: &
; CHECK-NEXT: Int: 0x120894; int value = 1181844
-; CHECK-NEXT: Int: 032173; int value = 13435
+; CHECK-NEXT: Int: 32173; int value = 32173
+; CHECK-NEXT: Minus: -
+; CHECK-NEXT: Int: 42; int value = 42
+; CHECK-NEXT: Int: 9; int value = 9
; CHECK-NEXT: Int: 2; int value = 2
; CHECK-NEXT: Pipe: |
; CHECK-NEXT: Amp: &
diff --git a/llvm/tools/llvm-rc/ResourceScriptToken.cpp b/llvm/tools/llvm-rc/ResourceScriptToken.cpp
index 0070037e63e6a..046a1bf78daef 100644
--- a/llvm/tools/llvm-rc/ResourceScriptToken.cpp
+++ b/llvm/tools/llvm-rc/ResourceScriptToken.cpp
@@ -26,11 +26,11 @@ using namespace llvm;
using Kind = RCToken::Kind;
// Checks if Representation is a correct description of an RC integer.
-// It should be a 32-bit unsigned integer, either decimal, octal (0[0-7]+),
-// or hexadecimal (0x[0-9a-f]+). It might be followed by a single 'L'
-// character (that is the
diff erence between our representation and
-// StringRef's one). If Representation is correct, 'true' is returned and
-// the return value is put back in Num.
+// It should be a 32-bit unsigned integer, either decimal or hexadecimal
+// (0x[0-9a-f]+). For Windres mode, it can also be octal (0[0-7]+).
+// It might be followed by a single 'L' character (that is the
diff erence
+// between our representation and StringRef's one). If Representation is
+// correct, 'true' is returned and the return value is put back in Num.
static bool rcGetAsInteger(StringRef Representation, uint32_t &Num) {
size_t Length = Representation.size();
if (Length == 0)
@@ -95,7 +95,8 @@ namespace {
class Tokenizer {
public:
- Tokenizer(StringRef Input) : Data(Input), DataLength(Input.size()), Pos(0) {}
+ Tokenizer(StringRef Input, bool IsWindres)
+ : Data(Input), DataLength(Input.size()), Pos(0), IsWindres(IsWindres) {}
Expected<std::vector<RCToken>> run();
@@ -128,6 +129,7 @@ class Tokenizer {
// character.
bool canStartInt() const;
bool canContinueInt() const;
+ void trimIntString(StringRef &Str) const;
bool canStartString() const;
@@ -153,6 +155,7 @@ class Tokenizer {
StringRef Data;
size_t DataLength, Pos;
+ bool IsWindres;
};
void Tokenizer::skipCurrentLine() {
@@ -187,7 +190,12 @@ Expected<std::vector<RCToken>> Tokenizer::run() {
if (TokenKind == Kind::LineComment || TokenKind == Kind::StartComment)
continue;
- RCToken Token(TokenKind, Data.take_front(Pos).drop_front(TokenStart));
+ StringRef Contents = Data.take_front(Pos).drop_front(TokenStart);
+
+ if (TokenKind == Kind::Int)
+ trimIntString(Contents);
+
+ RCToken Token(TokenKind, Contents);
if (TokenKind == Kind::Identifier) {
processIdentifier(Token);
} else if (TokenKind == Kind::Int) {
@@ -366,12 +374,30 @@ void Tokenizer::processIdentifier(RCToken &Token) const {
Token = RCToken(Kind::BlockEnd, Name);
}
+void Tokenizer::trimIntString(StringRef &Str) const {
+ if (!IsWindres) {
+ // For compatibility with rc.exe, strip leading zeros that make the
+ // integer literal interpreted as octal.
+ //
+ // We do rely on Stringref::getAsInteger for autodetecting between
+ // decimal and hexadecimal literals, but we want to avoid interpreting
+ // literals as octal.
+ //
+ // This omits the leading zeros from the RCToken's value string entirely,
+ // which also has a visible effect when dumping the tokenizer output.
+ // Alternatively, we could store the IsWindres flag in RCToken and defer
+ // the trimming to RCToken::intValue.
+ while (Str.size() >= 2 && Str[0] == '0' && std::isdigit(Str[1]))
+ Str = Str.drop_front(1);
+ }
+}
+
} // anonymous namespace
namespace llvm {
-Expected<std::vector<RCToken>> tokenizeRC(StringRef Input) {
- return Tokenizer(Input).run();
+Expected<std::vector<RCToken>> tokenizeRC(StringRef Input, bool IsWindres) {
+ return Tokenizer(Input, IsWindres).run();
}
} // namespace llvm
diff --git a/llvm/tools/llvm-rc/ResourceScriptToken.h b/llvm/tools/llvm-rc/ResourceScriptToken.h
index 3dcdfafd2d576..50ef8e4b00f53 100644
--- a/llvm/tools/llvm-rc/ResourceScriptToken.h
+++ b/llvm/tools/llvm-rc/ResourceScriptToken.h
@@ -76,7 +76,7 @@ class RCToken {
// Tokens returned by this function hold only references to the parts
// of the Input. Memory buffer containing Input cannot be freed,
// modified or reallocated.
-Expected<std::vector<RCToken>> tokenizeRC(StringRef Input);
+Expected<std::vector<RCToken>> tokenizeRC(StringRef Input, bool IsWindres);
} // namespace llvm
diff --git a/llvm/tools/llvm-rc/ResourceScriptTokenList.def b/llvm/tools/llvm-rc/ResourceScriptTokenList.def
index 6ee13b2815d35..98af23c649577 100644
--- a/llvm/tools/llvm-rc/ResourceScriptTokenList.def
+++ b/llvm/tools/llvm-rc/ResourceScriptTokenList.def
@@ -14,7 +14,7 @@
// Long tokens. They might consist of more than one character.
TOKEN(Invalid) // Invalid token. Should not occur in a valid script.
-TOKEN(Int) // Integer (decimal, octal or hexadecimal).
+TOKEN(Int) // Integer (decimal or hexadecimal, and possibly octal for windres).
TOKEN(String) // String value.
TOKEN(Identifier) // Script identifier (resource name or type).
TOKEN(LineComment) // Beginning of single-line comment.
diff --git a/llvm/tools/llvm-rc/llvm-rc.cpp b/llvm/tools/llvm-rc/llvm-rc.cpp
index f623342366515..38bf03f51227b 100644
--- a/llvm/tools/llvm-rc/llvm-rc.cpp
+++ b/llvm/tools/llvm-rc/llvm-rc.cpp
@@ -619,7 +619,8 @@ void doRc(std::string Src, std::string Dest, RcOptions &Opts,
StringRef Contents = FileContents->getBuffer();
std::string FilteredContents = filterCppOutput(Contents);
- std::vector<RCToken> Tokens = ExitOnErr(tokenizeRC(FilteredContents));
+ std::vector<RCToken> Tokens =
+ ExitOnErr(tokenizeRC(FilteredContents, Opts.IsWindres));
if (Opts.BeVerbose) {
const Twine TokenNames[] = {
More information about the llvm-commits
mailing list