[llvm] [llvm-windres] Change the interpretation of --preprocessor to match Binutils 2.36 (PR #75391)

Martin Storsjö via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 13 14:05:33 PST 2023


https://github.com/mstorsjo created https://github.com/llvm/llvm-project/pull/75391

Binutils 2.36 had a somewhat controversial change in how the --preprocessor option was handled in GNU windres; previously, the option was interpreted as a part of the command string, potentially containing multiple arguments (which even was hinted at in the documentation).

In Binutils 2.36, this was changed to interpret the --preprocessor argument as one argument (possibly containing spaces) pointing at the preprocessor executable.

The existing behaviour where implicit arguments like -E -xc -DRC_INVOKED are dropped if --preprocessor is specified, was kept.

This was a breaking change for some users of GNU windres, see https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=21c33bcbe36377abf01614fb1b9be439a3b6de20, https://sourceware.org/bugzilla/show_bug.cgi?id=27594, and https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=5edb8e3f5ad8d74a83fc0df7f6e4514eed0aa77f.

As multiple years have passed since, the behaviour change seems to be here to stay, and any users of the previous form of the option have been forced to avoid this construct. Thus update llvm-windres to match the new way Binutils of handling this option.

One construct for specifying the path to the preprocessor, which works both before and after binutils 2.36 (and this change in llvm-windres) is to specify options like this:

    --preprocessor path/to/executable --preprocessor-arg -E --preprocessor-arg -xc -DRC_INVOKED

>From 3f6d7d3863bfd49b974e683a7bc1e2260196689c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin at martin.st>
Date: Wed, 13 Dec 2023 13:45:18 +0200
Subject: [PATCH 1/3] [llvm-windres] Pass user preprocessor arguments before
 the input filename

If passing the windres option --preprocessor, the default arguments
"-E -xc -DRC_INVOKED" aren't passed. If these are passed explicitly
by the user via --preprocessor-arg instead, we need to make sure that
"-xc" is passed before the input filename, as this compiler/preprocessor
option only has an effect on input files that follow it.

This fixes one of the issues with llvm-windres observed in
https://github.com/msys2/MINGW-packages/pull/19157.
---
 llvm/test/tools/llvm-rc/preproc.test         | 2 +-
 llvm/test/tools/llvm-rc/windres-preproc.test | 2 +-
 llvm/tools/llvm-rc/llvm-rc.cpp               | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/tools/llvm-rc/preproc.test b/llvm/test/tools/llvm-rc/preproc.test
index f55e5434dce360..7a6d8b3db03662 100644
--- a/llvm/test/tools/llvm-rc/preproc.test
+++ b/llvm/test/tools/llvm-rc/preproc.test
@@ -1,3 +1,3 @@
 ; RUN: llvm-rc -### -i%p "-DFOO1=\"foo bar\"" -UFOO2 -D FOO3 -- %p/Inputs/empty.rc | FileCheck %s
 
-; CHECK: {{^}} "clang" "--driver-mode=gcc" "-target" "{{.*}}-pc-windows-msvc-coff" "-E" "-xc" "-DRC_INVOKED" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc" "-I" "{{.*}}" "-D" "FOO1=\"foo bar\"" "-U" "FOO2" "-D" "FOO3"{{$}}
+; CHECK: {{^}} "clang" "--driver-mode=gcc" "-target" "{{.*}}-pc-windows-msvc-coff" "-E" "-xc" "-DRC_INVOKED" "-I" "{{.*}}" "-D" "FOO1=\"foo bar\"" "-U" "FOO2" "-D" "FOO3" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc"{{$}}
diff --git a/llvm/test/tools/llvm-rc/windres-preproc.test b/llvm/test/tools/llvm-rc/windres-preproc.test
index e55195b3a4d280..42e83ba13021c2 100644
--- a/llvm/test/tools/llvm-rc/windres-preproc.test
+++ b/llvm/test/tools/llvm-rc/windres-preproc.test
@@ -5,6 +5,6 @@
 
 ; RUN: llvm-windres -### --include-dir %p/incdir1 --include %p/incdir2 "-DFOO1=\\\"foo bar\\\"" -UFOO2 -D FOO3 --preprocessor-arg "-DFOO4=\\\"baz baz\\\"" -DFOO5=\"bar\" %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK1
 ; RUN: llvm-windres -### --include-dir %p/incdir1 --include %p/incdir2 "-DFOO1=\"foo bar\"" -UFOO2 -D FOO3 --preprocessor-arg "-DFOO4=\"baz baz\"" "-DFOO5=bar" %p/Inputs/empty.rc %t.res --use-temp-file | FileCheck %s --check-prefix=CHECK1
-; CHECK1: {{^}} "clang" "--driver-mode=gcc" "-target" "{{.*}}-{{.*}}{{mingw32|windows-gnu}}" "-E" "-xc" "-DRC_INVOKED" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc" "-I" "{{.*}}incdir1" "-I" "{{.*}}incdir2" "-D" "FOO1=\"foo bar\"" "-U" "FOO2" "-D" "FOO3" "-DFOO4=\"baz baz\"" "-D" "FOO5=bar"{{$}}
+; CHECK1: {{^}} "clang" "--driver-mode=gcc" "-target" "{{.*}}-{{.*}}{{mingw32|windows-gnu}}" "-E" "-xc" "-DRC_INVOKED" "-I" "{{.*}}incdir1" "-I" "{{.*}}incdir2" "-D" "FOO1=\"foo bar\"" "-U" "FOO2" "-D" "FOO3" "-DFOO4=\"baz baz\"" "-D" "FOO5=bar" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc"{{$}}
 ; RUN: llvm-windres -### --preprocessor "i686-w64-mingw32-gcc -E -DFOO=\\\"foo\\ bar\\\"" %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK2
 ; CHECK2: {{^}} "i686-w64-mingw32-gcc" "-E" "-DFOO=\"foo bar\"" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc"{{$}}
diff --git a/llvm/tools/llvm-rc/llvm-rc.cpp b/llvm/tools/llvm-rc/llvm-rc.cpp
index b955347f2a8646..43756971fc9ba0 100644
--- a/llvm/tools/llvm-rc/llvm-rc.cpp
+++ b/llvm/tools/llvm-rc/llvm-rc.cpp
@@ -253,11 +253,11 @@ void preprocess(StringRef Src, StringRef Dst, const RcOptions &Opts,
     for (const auto &S : Opts.PreprocessCmd)
       Args.push_back(S);
   }
+  for (const auto &S : Opts.PreprocessArgs)
+    Args.push_back(S);
   Args.push_back(Src);
   Args.push_back("-o");
   Args.push_back(Dst);
-  for (const auto &S : Opts.PreprocessArgs)
-    Args.push_back(S);
   if (Opts.PrintCmdAndExit || Opts.BeVerbose) {
     for (const auto &A : Args) {
       outs() << " ";

>From f95590841b4367a3863caeb36c66da988ca12773 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin at martin.st>
Date: Wed, 13 Dec 2023 14:12:49 +0200
Subject: [PATCH 2/3] [llvm-windres] Resolve the --preprocessor executable in
 $PATH

The llvm::sys::ExecuteAndWait function doesn't resolve the
file to be executed from $PATH - i.e. it is similar to execv(),
not execvp().

Due to this, specifying a --preprocessor argument to llvm-windres
only worked if it specified an absolute path to the preprocessor
executable. This was observed as one of the issues in
https://github.com/msys2/MINGW-packages/pull/19157.

Before d2fa6b694c2052cef1ddd507f6569bc84e3bbe35, this usage of
--preprocessor seemed to work, because the first argument of
Args[] was ignored and llvm-windres just executed the autodetected
clang executable regardless.

Also improve the error messages printed if preprocessing failed.
(If the preprocessor executable was started but itself returned
an error, we don't get any error string.)
---
 llvm/test/tools/llvm-rc/windres-preproc.test | 16 +++++++++++++++-
 llvm/tools/llvm-rc/llvm-rc.cpp               | 17 +++++++++++++++--
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/llvm/test/tools/llvm-rc/windres-preproc.test b/llvm/test/tools/llvm-rc/windres-preproc.test
index 42e83ba13021c2..74e888614aa2b9 100644
--- a/llvm/test/tools/llvm-rc/windres-preproc.test
+++ b/llvm/test/tools/llvm-rc/windres-preproc.test
@@ -7,4 +7,18 @@
 ; RUN: llvm-windres -### --include-dir %p/incdir1 --include %p/incdir2 "-DFOO1=\"foo bar\"" -UFOO2 -D FOO3 --preprocessor-arg "-DFOO4=\"baz baz\"" "-DFOO5=bar" %p/Inputs/empty.rc %t.res --use-temp-file | FileCheck %s --check-prefix=CHECK1
 ; CHECK1: {{^}} "clang" "--driver-mode=gcc" "-target" "{{.*}}-{{.*}}{{mingw32|windows-gnu}}" "-E" "-xc" "-DRC_INVOKED" "-I" "{{.*}}incdir1" "-I" "{{.*}}incdir2" "-D" "FOO1=\"foo bar\"" "-U" "FOO2" "-D" "FOO3" "-DFOO4=\"baz baz\"" "-D" "FOO5=bar" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc"{{$}}
 ; RUN: llvm-windres -### --preprocessor "i686-w64-mingw32-gcc -E -DFOO=\\\"foo\\ bar\\\"" %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK2
-; CHECK2: {{^}} "i686-w64-mingw32-gcc" "-E" "-DFOO=\"foo bar\"" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc"{{$}}
+; CHECK2: {{^}} "{{.*}}i686-w64-mingw32-gcc" "-E" "-DFOO=\"foo bar\"" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc"{{$}}
+
+;; Test resolving the --preprocessor executable from PATH
+
+; RUN: rm -rf %t-bin/testbin
+; RUN: mkdir -p %t-bin/testbin
+; RUN: ln -s llvm-windres %t-bin/testbin/i686-w64-mingw32-gcc
+; RUN: env PATH=%t-bin/testbin llvm-windres -### --preprocessor i686-w64-mingw32-gcc --preprocessor-arg -E --preprocessor-arg -xc -DRC_INVOKED %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK3
+; CHECK3: {{^}} "{{.*}}/testbin/i686-w64-mingw32-gcc" "-E" "-xc" "-D" "RC_INVOKED" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc"{{$}}
+
+
+;; Test error messages when unable to execute the preprocessor.
+
+; RUN: not llvm-windres --preprocessor intentionally-missing-executable %p/Inputs/empty.rc %t.res 2>&1 | FileCheck %s --check-prefix=CHECK4
+; CHECK4: llvm-rc: Preprocessing failed: Executable "intentionally-missing-executable" doesn't exist!
diff --git a/llvm/tools/llvm-rc/llvm-rc.cpp b/llvm/tools/llvm-rc/llvm-rc.cpp
index 43756971fc9ba0..27fb0309e0ee54 100644
--- a/llvm/tools/llvm-rc/llvm-rc.cpp
+++ b/llvm/tools/llvm-rc/llvm-rc.cpp
@@ -248,10 +248,17 @@ void preprocess(StringRef Src, StringRef Dst, const RcOptions &Opts,
   SmallVector<StringRef, 8> Args = {
       Clang, "--driver-mode=gcc", "-target", Opts.Triple, "-E",
       "-xc", "-DRC_INVOKED"};
+  std::string PreprocessorExecutable;
   if (!Opts.PreprocessCmd.empty()) {
     Args.clear();
     for (const auto &S : Opts.PreprocessCmd)
       Args.push_back(S);
+    if (!sys::fs::can_execute(Args[0])) {
+      if (auto P = sys::findProgramByName(Args[0])) {
+        PreprocessorExecutable = *P;
+        Args[0] = PreprocessorExecutable;
+      }
+    }
   }
   for (const auto &S : Opts.PreprocessArgs)
     Args.push_back(S);
@@ -269,9 +276,15 @@ void preprocess(StringRef Src, StringRef Dst, const RcOptions &Opts,
   }
   // The llvm Support classes don't handle reading from stdout of a child
   // process; otherwise we could avoid using a temp file.
-  int Res = sys::ExecuteAndWait(Args[0], Args);
+  std::string ErrMsg;
+  int Res =
+      sys::ExecuteAndWait(Args[0], Args, /*Env=*/std::nullopt, /*Redirects=*/{},
+                          /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg);
   if (Res) {
-    fatalError("llvm-rc: Preprocessing failed.");
+    if (!ErrMsg.empty())
+      fatalError("llvm-rc: Preprocessing failed: " + ErrMsg);
+    else
+      fatalError("llvm-rc: Preprocessing failed.");
   }
 }
 

>From fe36e4bc38a0a54e521fc2eeb6190420ed854702 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin at martin.st>
Date: Wed, 13 Dec 2023 14:39:18 +0200
Subject: [PATCH 3/3] [llvm-windres] Change the interpretation of
 --preprocessor to match Binutils 2.36

Binutils 2.36 had a somewhat controversial change in how the
--preprocessor option was handled; previously, the option was
interpreted as a part of the command string, potentially containing
multiple arguments (which even was hinted at in the documentation).

In Binutils 2.36, this was changed to interpret the --preprocessor
argument as one argument (possibly containing spaces) pointing at
the preprocessor executable.

The existing behaviour where implicit arguments like -E -xc -DRC_INVOKED
are dropped if --preprocessor is specified, was kept.

This was a breaking change for some users of GNU windres, see
https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=21c33bcbe36377abf01614fb1b9be439a3b6de20,
https://sourceware.org/bugzilla/show_bug.cgi?id=27594, and
https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=5edb8e3f5ad8d74a83fc0df7f6e4514eed0aa77f.

As multiple years have passed since, the behaviour change seems
to be here to stay, and any users of the previous form of the
option have been forced to avoid this construct.

One construct for specifying the path to the preprocessor,
which works both before and after binutils 2.36 (and this change
in llvm-windres) is to specify options like this:

    --preprocessor path/to/executable --preprocessor-arg -E --preprocessor-arg -xc -DRC_INVOKED
---
 llvm/test/tools/llvm-rc/windres-preproc.test |  9 +++-
 llvm/tools/llvm-rc/llvm-rc.cpp               | 44 +++-----------------
 2 files changed, 12 insertions(+), 41 deletions(-)

diff --git a/llvm/test/tools/llvm-rc/windres-preproc.test b/llvm/test/tools/llvm-rc/windres-preproc.test
index 74e888614aa2b9..13f82299a074bb 100644
--- a/llvm/test/tools/llvm-rc/windres-preproc.test
+++ b/llvm/test/tools/llvm-rc/windres-preproc.test
@@ -6,8 +6,8 @@
 ; RUN: llvm-windres -### --include-dir %p/incdir1 --include %p/incdir2 "-DFOO1=\\\"foo bar\\\"" -UFOO2 -D FOO3 --preprocessor-arg "-DFOO4=\\\"baz baz\\\"" -DFOO5=\"bar\" %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK1
 ; RUN: llvm-windres -### --include-dir %p/incdir1 --include %p/incdir2 "-DFOO1=\"foo bar\"" -UFOO2 -D FOO3 --preprocessor-arg "-DFOO4=\"baz baz\"" "-DFOO5=bar" %p/Inputs/empty.rc %t.res --use-temp-file | FileCheck %s --check-prefix=CHECK1
 ; CHECK1: {{^}} "clang" "--driver-mode=gcc" "-target" "{{.*}}-{{.*}}{{mingw32|windows-gnu}}" "-E" "-xc" "-DRC_INVOKED" "-I" "{{.*}}incdir1" "-I" "{{.*}}incdir2" "-D" "FOO1=\"foo bar\"" "-U" "FOO2" "-D" "FOO3" "-DFOO4=\"baz baz\"" "-D" "FOO5=bar" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc"{{$}}
-; RUN: llvm-windres -### --preprocessor "i686-w64-mingw32-gcc -E -DFOO=\\\"foo\\ bar\\\"" %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK2
-; CHECK2: {{^}} "{{.*}}i686-w64-mingw32-gcc" "-E" "-DFOO=\"foo bar\"" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc"{{$}}
+; RUN: llvm-windres -### --preprocessor "i686-w64-mingw32-gcc" --preprocessor-arg -E "-DFOO=\\\"foo bar\\\"" %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK2
+; CHECK2: {{^}} "{{.*}}i686-w64-mingw32-gcc" "-E" "-D" "FOO=\"foo bar\"" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc"{{$}}
 
 ;; Test resolving the --preprocessor executable from PATH
 
@@ -22,3 +22,8 @@
 
 ; RUN: not llvm-windres --preprocessor intentionally-missing-executable %p/Inputs/empty.rc %t.res 2>&1 | FileCheck %s --check-prefix=CHECK4
 ; CHECK4: llvm-rc: Preprocessing failed: Executable "intentionally-missing-executable" doesn't exist!
+
+;; Test --preprocessor with an argument with spaces.
+
+; RUN: llvm-windres -### --preprocessor "path with spaces/gcc" %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK5
+; CHECK5: {{^}} "path with spaces/gcc" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc"{{$}}
diff --git a/llvm/tools/llvm-rc/llvm-rc.cpp b/llvm/tools/llvm-rc/llvm-rc.cpp
index 27fb0309e0ee54..78ab96492acc75 100644
--- a/llvm/tools/llvm-rc/llvm-rc.cpp
+++ b/llvm/tools/llvm-rc/llvm-rc.cpp
@@ -209,7 +209,7 @@ struct RcOptions {
   bool Preprocess = true;
   bool PrintCmdAndExit = false;
   std::string Triple;
-  std::vector<std::string> PreprocessCmd;
+  std::optional<std::string> Preprocessor;
   std::vector<std::string> PreprocessArgs;
 
   std::string InputFile;
@@ -229,7 +229,7 @@ struct RcOptions {
 void preprocess(StringRef Src, StringRef Dst, const RcOptions &Opts,
                 const char *Argv0) {
   std::string Clang;
-  if (Opts.PrintCmdAndExit || !Opts.PreprocessCmd.empty()) {
+  if (Opts.PrintCmdAndExit || Opts.Preprocessor) {
     Clang = "clang";
   } else {
     ErrorOr<std::string> ClangOrErr = findClang(Argv0, Opts.Triple);
@@ -249,10 +249,9 @@ void preprocess(StringRef Src, StringRef Dst, const RcOptions &Opts,
       Clang, "--driver-mode=gcc", "-target", Opts.Triple, "-E",
       "-xc", "-DRC_INVOKED"};
   std::string PreprocessorExecutable;
-  if (!Opts.PreprocessCmd.empty()) {
+  if (Opts.Preprocessor) {
     Args.clear();
-    for (const auto &S : Opts.PreprocessCmd)
-      Args.push_back(S);
+    Args.push_back(*Opts.Preprocessor);
     if (!sys::fs::can_execute(Args[0])) {
       if (auto P = sys::findProgramByName(Args[0])) {
         PreprocessorExecutable = *P;
@@ -342,36 +341,6 @@ std::string unescape(StringRef S) {
   return Out;
 }
 
-std::vector<std::string> unescapeSplit(StringRef S) {
-  std::vector<std::string> OutArgs;
-  std::string Out;
-  bool InQuote = false;
-  for (int I = 0, E = S.size(); I < E; I++) {
-    if (S[I] == '\\') {
-      if (I + 1 < E)
-        Out.push_back(S[++I]);
-      else
-        fatalError("Unterminated escape");
-      continue;
-    }
-    if (S[I] == '"') {
-      InQuote = !InQuote;
-      continue;
-    }
-    if (S[I] == ' ' && !InQuote) {
-      OutArgs.push_back(Out);
-      Out.clear();
-      continue;
-    }
-    Out.push_back(S[I]);
-  }
-  if (InQuote)
-    fatalError("Unterminated quote");
-  if (!Out.empty())
-    OutArgs.push_back(Out);
-  return OutArgs;
-}
-
 RcOptions parseWindresOptions(ArrayRef<const char *> ArgsArr,
                               ArrayRef<const char *> InputArgsArray,
                               std::string Prefix) {
@@ -506,11 +475,8 @@ RcOptions parseWindresOptions(ArrayRef<const char *> ArgsArr,
       break;
     }
   }
-  // TODO: If --use-temp-file is set, we shouldn't be unescaping
-  // the --preprocessor argument either, only splitting it.
   if (InputArgs.hasArg(WINDRES_preprocessor))
-    Opts.PreprocessCmd =
-        unescapeSplit(InputArgs.getLastArgValue(WINDRES_preprocessor));
+    Opts.Preprocessor = InputArgs.getLastArgValue(WINDRES_preprocessor);
 
   Opts.Params.CodePage = CpWin1252; // Different default
   if (InputArgs.hasArg(WINDRES_codepage)) {



More information about the llvm-commits mailing list