[llvm] r190423 - Add getenv() wrapper that works on multibyte environment variable.

Rui Ueyama ruiu at google.com
Tue Sep 10 12:45:51 PDT 2013


Author: ruiu
Date: Tue Sep 10 14:45:51 2013
New Revision: 190423

URL: http://llvm.org/viewvc/llvm-project?rev=190423&view=rev
Log:
Add getenv() wrapper that works on multibyte environment variable.

On Windows, character encoding of multibyte environment variable varies
depending on settings. The only reliable way to handle it I think is to use
GetEnvironmentVariableW().

GetEnvironmentVariableW() works on wchar_t string, which is on Windows UTF16
string. That's not ideal because we use UTF-8 as the internal encoding in LLVM.
This patch defines a wrapper function which takes and returns UTF-8 string for
GetEnvironmentVariableW().

The wrapper function does not do any conversion and just forwards the argument
to getenv() on Unix.

Differential Revision: http://llvm-reviews.chandlerc.com/D1612

Modified:
    llvm/trunk/include/llvm/Support/Process.h
    llvm/trunk/lib/Support/Unix/Process.inc
    llvm/trunk/lib/Support/Windows/Path.inc
    llvm/trunk/lib/Support/Windows/Process.inc
    llvm/trunk/lib/Support/Windows/Windows.h
    llvm/trunk/unittests/Support/ProcessTest.cpp

Modified: llvm/trunk/include/llvm/Support/Process.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/Process.h?rev=190423&r1=190422&r2=190423&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/Process.h (original)
+++ llvm/trunk/include/llvm/Support/Process.h Tue Sep 10 14:45:51 2013
@@ -25,11 +25,14 @@
 #ifndef LLVM_SUPPORT_PROCESS_H
 #define LLVM_SUPPORT_PROCESS_H
 
+#include "llvm/ADT/Optional.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/TimeValue.h"
 
 namespace llvm {
+class StringRef;
+
 namespace sys {
 
 class self_process;
@@ -161,6 +164,10 @@ public:
   /// @brief Prevent core file generation.
   static void PreventCoreFiles();
 
+  // This function returns the environment variable \arg name's value as a UTF-8
+  // string. \arg Name is assumed to be in UTF-8 encoding too.
+  static Optional<std::string> GetEnv(StringRef name);
+
   /// This function determines if the standard input is connected directly
   /// to a user's input (keyboard probably), rather than coming from a file
   /// or pipe.

Modified: llvm/trunk/lib/Support/Unix/Process.inc
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Unix/Process.inc?rev=190423&r1=190422&r2=190423&view=diff
==============================================================================
--- llvm/trunk/lib/Support/Unix/Process.inc (original)
+++ llvm/trunk/lib/Support/Unix/Process.inc Tue Sep 10 14:45:51 2013
@@ -13,6 +13,7 @@
 
 #include "Unix.h"
 #include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/MutexGuard.h"
 #include "llvm/Support/TimeValue.h"
@@ -181,6 +182,14 @@ void Process::PreventCoreFiles() {
 #endif
 }
 
+Optional<std::string> Process::GetEnv(StringRef Name) {
+  std::string NameStr = Name.str();
+  const char *Val = ::getenv(NameStr.c_str());
+  if (!Val)
+    return None;
+  return std::string(Val);
+}
+
 bool Process::StandardInIsUserInput() {
   return FileDescriptorIsDisplayed(STDIN_FILENO);
 }

Modified: llvm/trunk/lib/Support/Windows/Path.inc
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Windows/Path.inc?rev=190423&r1=190422&r2=190423&view=diff
==============================================================================
--- llvm/trunk/lib/Support/Windows/Path.inc (original)
+++ llvm/trunk/lib/Support/Windows/Path.inc Tue Sep 10 14:45:51 2013
@@ -37,6 +37,9 @@ typedef int errno_t;
 
 using namespace llvm;
 
+using llvm::sys::windows::UTF8ToUTF16;
+using llvm::sys::windows::UTF16ToUTF8;
+
 namespace {
   typedef BOOLEAN (WINAPI *PtrCreateSymbolicLinkW)(
     /*__in*/ LPCWSTR lpSymlinkFileName,
@@ -47,61 +50,6 @@ namespace {
     ::GetProcAddress(::GetModuleHandleA("kernel32.dll"),
                      "CreateSymbolicLinkW"));
 
-  error_code UTF8ToUTF16(StringRef utf8, SmallVectorImpl<wchar_t> &utf16) {
-    int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
-                                    utf8.begin(), utf8.size(),
-                                    utf16.begin(), 0);
-
-    if (len == 0)
-      return windows_error(::GetLastError());
-
-    utf16.reserve(len + 1);
-    utf16.set_size(len);
-
-    len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
-                                    utf8.begin(), utf8.size(),
-                                    utf16.begin(), utf16.size());
-
-    if (len == 0)
-      return windows_error(::GetLastError());
-
-    // Make utf16 null terminated.
-    utf16.push_back(0);
-    utf16.pop_back();
-
-    return error_code::success();
-  }
-
-  error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
-                               SmallVectorImpl<char> &utf8) {
-    // Get length.
-    int len = ::WideCharToMultiByte(CP_UTF8, 0,
-                                    utf16, utf16_len,
-                                    utf8.begin(), 0,
-                                    NULL, NULL);
-
-    if (len == 0)
-      return windows_error(::GetLastError());
-
-    utf8.reserve(len);
-    utf8.set_size(len);
-
-    // Now do the actual conversion.
-    len = ::WideCharToMultiByte(CP_UTF8, 0,
-                                utf16, utf16_len,
-                                utf8.data(), utf8.size(),
-                                NULL, NULL);
-
-    if (len == 0)
-      return windows_error(::GetLastError());
-
-    // Make utf8 null terminated.
-    utf8.push_back(0);
-    utf8.pop_back();
-
-    return error_code::success();
-  }
-
   error_code TempDir(SmallVectorImpl<wchar_t> &result) {
   retry_temp_dir:
     DWORD len = ::GetTempPathW(result.capacity(), result.begin());
@@ -1092,7 +1040,64 @@ error_code openFileForWrite(const Twine
   ResultFD = FD;
   return error_code::success();
 }
-
 } // end namespace fs
+
+namespace windows {
+llvm::error_code UTF8ToUTF16(llvm::StringRef utf8,
+                             llvm::SmallVectorImpl<wchar_t> &utf16) {
+  int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
+                                  utf8.begin(), utf8.size(),
+                                  utf16.begin(), 0);
+
+  if (len == 0)
+    return llvm::windows_error(::GetLastError());
+
+  utf16.reserve(len + 1);
+  utf16.set_size(len);
+
+  len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
+                              utf8.begin(), utf8.size(),
+                              utf16.begin(), utf16.size());
+
+  if (len == 0)
+    return llvm::windows_error(::GetLastError());
+
+  // Make utf16 null terminated.
+  utf16.push_back(0);
+  utf16.pop_back();
+
+  return llvm::error_code::success();
+}
+
+llvm::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
+                             llvm::SmallVectorImpl<char> &utf8) {
+  // Get length.
+  int len = ::WideCharToMultiByte(CP_UTF8, 0,
+                                  utf16, utf16_len,
+                                  utf8.begin(), 0,
+                                  NULL, NULL);
+
+  if (len == 0)
+    return llvm::windows_error(::GetLastError());
+
+  utf8.reserve(len);
+  utf8.set_size(len);
+
+  // Now do the actual conversion.
+  len = ::WideCharToMultiByte(CP_UTF8, 0,
+                              utf16, utf16_len,
+                              utf8.data(), utf8.size(),
+                              NULL, NULL);
+
+  if (len == 0)
+    return llvm::windows_error(::GetLastError());
+
+  // Make utf8 null terminated.
+  utf8.push_back(0);
+  utf8.pop_back();
+
+  return llvm::error_code::success();
+}
+} // end namespace windows
 } // end namespace sys
 } // end namespace llvm

Modified: llvm/trunk/lib/Support/Windows/Process.inc
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Windows/Process.inc?rev=190423&r1=190422&r2=190423&view=diff
==============================================================================
--- llvm/trunk/lib/Support/Windows/Process.inc (original)
+++ llvm/trunk/lib/Support/Windows/Process.inc Tue Sep 10 14:45:51 2013
@@ -140,6 +140,36 @@ void Process::PreventCoreFiles() {
                SEM_NOOPENFILEERRORBOX);
 }
 
+/// Returns the environment variable \arg Name's value as a string encoded in
+/// UTF-8. \arg Name is assumed to be in UTF-8 encoding.
+Optional<std::string> Process::GetEnv(StringRef Name) {
+  // Convert the argument to UTF-16 to pass it to _wgetenv().
+  SmallVector<wchar_t, 128> NameUTF16;
+  if (error_code ec = windows::UTF8ToUTF16(Name, NameUTF16))
+    return None;
+
+  // Environment variable can be encoded in non-UTF8 encoding, and there's no
+  // way to know what the encoding is. The only reliable way to look up
+  // multibyte environment variable is to use GetEnvironmentVariableW().
+  std::vector<wchar_t> Buf(16);
+  size_t Size = 0;
+  for (;;) {
+    Size = GetEnvironmentVariableW(&NameUTF16[0], &Buf[0], Buf.size());
+    if (Size < Buf.size())
+      break;
+    // Try again with larger buffer.
+    Buf.resize(Size + 1);
+  }
+  if (Size == 0)
+    return None;
+
+  // Convert the result from UTF-16 to UTF-8.
+  SmallVector<char, 128> Res;
+  if (error_code ec = windows::UTF16ToUTF8(&Buf[0], Size, Res))
+    return None;
+  return std::string(&Res[0]);
+}
+
 bool Process::StandardInIsUserInput() {
   return FileDescriptorIsDisplayed(0);
 }

Modified: llvm/trunk/lib/Support/Windows/Windows.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Windows/Windows.h?rev=190423&r1=190422&r2=190423&view=diff
==============================================================================
--- llvm/trunk/lib/Support/Windows/Windows.h (original)
+++ llvm/trunk/lib/Support/Windows/Windows.h Tue Sep 10 14:45:51 2013
@@ -24,13 +24,17 @@
 #define _WIN32_IE    0x0600 // MinGW at it again.
 #define WIN32_LEAN_AND_MEAN
 
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Config/config.h" // Get build system configuration settings
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/system_error.h"
 #include <windows.h>
 #include <wincrypt.h>
 #include <shlobj.h>
 #include <cassert>
 #include <string>
+#include <vector>
 
 inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
   if (!ErrMsg)
@@ -148,4 +152,13 @@ c_str(SmallVectorImpl<T> &str) {
   str.pop_back();
   return str.data();
 }
+
+namespace sys {
+namespace windows {
+error_code UTF8ToUTF16(StringRef utf8,
+                       SmallVectorImpl<wchar_t> &utf16);
+error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
+                       SmallVectorImpl<char> &utf8);
+} // end namespace windows
+} // end namespace sys
 } // end namespace llvm.

Modified: llvm/trunk/unittests/Support/ProcessTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Support/ProcessTest.cpp?rev=190423&r1=190422&r2=190423&view=diff
==============================================================================
--- llvm/trunk/unittests/Support/ProcessTest.cpp (original)
+++ llvm/trunk/unittests/Support/ProcessTest.cpp Tue Sep 10 14:45:51 2013
@@ -39,4 +39,32 @@ TEST(ProcessTest, SelfProcess) {
   EXPECT_GT(TimeValue::MaxTime, process::get_self()->get_wall_time());
 }
 
+#ifdef LLVM_ON_WIN32
+#define setenv(name, var, ignore) _putenv_s(name, var)
+#endif
+
+#if HAVE_SETENV || defined(LLVM_ON_WIN32)
+TEST(ProcessTest, Basic) {
+  setenv("__LLVM_TEST_ENVIRON_VAR__", "abc", true);
+  Optional<std::string> val(Process::GetEnv("__LLVM_TEST_ENVIRON_VAR__"));
+  EXPECT_TRUE(val.hasValue());
+  EXPECT_STREQ("abc", val->c_str());
+}
+
+TEST(ProcessTest, None) {
+  Optional<std::string> val(
+      Process::GetEnv("__LLVM_TEST_ENVIRON_NO_SUCH_VAR__"));
+  EXPECT_FALSE(val.hasValue());
+}
+#endif
+
+#ifdef LLVM_ON_WIN32
+TEST(ProcessTest, Wchar) {
+  SetEnvironmentVariableW(L"__LLVM_TEST_ENVIRON_VAR__", L"abcdefghijklmnopqrs");
+  Optional<std::string> val(Process::GetEnv("__LLVM_TEST_ENVIRON_VAR__"));
+  EXPECT_TRUE(val.hasValue());
+  EXPECT_STREQ("abcdefghijklmnopqrs", val->c_str());
+}
+#endif
+
 } // end anonymous namespace





More information about the llvm-commits mailing list