[llvm-commits] [llvm] r154235 - in /llvm/trunk: include/llvm/MC/MCAsmInfo.h lib/MC/MCAsmInfo.cpp lib/Target/Mangler.cpp test/CodeGen/X86/utf8.ll

Sean Hunt scshunt at csclub.uwaterloo.ca
Fri Apr 6 17:37:54 PDT 2012


Author: coppro
Date: Fri Apr  6 19:37:53 2012
New Revision: 154235

URL: http://llvm.org/viewvc/llvm-project?rev=154235&view=rev
Log:
Output UTF-8-encoded characters as identifier characters into assembly
by default.

This is a behaviour configurable in the MCAsmInfo. I've decided to turn
it on by default in (possibly optimistic) hopes that most assemblers are
reasonably sane. If this proves a problem, switching to default seems
reasonable.

I'm not sure if this is the opportune place to test, but it seemed good
to make sure it was tested somewhere.

Added:
    llvm/trunk/test/CodeGen/X86/utf8.ll
Modified:
    llvm/trunk/include/llvm/MC/MCAsmInfo.h
    llvm/trunk/lib/MC/MCAsmInfo.cpp
    llvm/trunk/lib/Target/Mangler.cpp

Modified: llvm/trunk/include/llvm/MC/MCAsmInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAsmInfo.h?rev=154235&r1=154234&r2=154235&view=diff
==============================================================================
--- llvm/trunk/include/llvm/MC/MCAsmInfo.h (original)
+++ llvm/trunk/include/llvm/MC/MCAsmInfo.h Fri Apr  6 19:37:53 2012
@@ -143,6 +143,10 @@
     /// symbol names.  This defaults to true.
     bool AllowPeriodsInName;
 
+    /// AllowUTF8 - This is true if the assembler accepts UTF-8 input.
+    // FIXME: Make this a more general encoding setting?
+    bool AllowUTF8;
+
     //===--- Data Emission Directives -------------------------------------===//
 
     /// ZeroDirective - this should be set to the directive used to get some
@@ -485,6 +489,9 @@
     bool doesAllowPeriodsInName() const {
       return AllowPeriodsInName;
     }
+    bool doesAllowUTF8() const {
+      return AllowUTF8;
+    }
     const char *getZeroDirective() const {
       return ZeroDirective;
     }

Modified: llvm/trunk/lib/MC/MCAsmInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAsmInfo.cpp?rev=154235&r1=154234&r2=154235&view=diff
==============================================================================
--- llvm/trunk/lib/MC/MCAsmInfo.cpp (original)
+++ llvm/trunk/lib/MC/MCAsmInfo.cpp Fri Apr  6 19:37:53 2012
@@ -49,6 +49,7 @@
   AllowQuotesInName = false;
   AllowNameToStartWithDigit = false;
   AllowPeriodsInName = true;
+  AllowUTF8 = true;
   ZeroDirective = "\t.zero\t";
   AsciiDirective = "\t.ascii\t";
   AscizDirective = "\t.asciz\t";

Modified: llvm/trunk/lib/Target/Mangler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mangler.cpp?rev=154235&r1=154234&r2=154235&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Mangler.cpp (original)
+++ llvm/trunk/lib/Target/Mangler.cpp Fri Apr  6 19:37:53 2012
@@ -22,12 +22,13 @@
 #include "llvm/ADT/Twine.h"
 using namespace llvm;
 
-static bool isAcceptableChar(char C, bool AllowPeriod) {
+static bool isAcceptableChar(char C, bool AllowPeriod, bool AllowUTF8) {
   if ((C < 'a' || C > 'z') &&
       (C < 'A' || C > 'Z') &&
       (C < '0' || C > '9') &&
       C != '_' && C != '$' && C != '@' &&
-      !(AllowPeriod && C == '.'))
+      !(AllowPeriod && C == '.') &&
+      !(AllowUTF8 && (C & 0x80)))
     return false;
   return true;
 }
@@ -56,8 +57,9 @@
   // If any of the characters in the string is an unacceptable character, force
   // quotes.
   bool AllowPeriod = MAI.doesAllowPeriodsInName();
+  bool AllowUTF8 = MAI.doesAllowUTF8();
   for (unsigned i = 0, e = Str.size(); i != e; ++i)
-    if (!isAcceptableChar(Str[i], AllowPeriod))
+    if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8))
       return true;
   return false;
 }
@@ -74,8 +76,9 @@
   }
 
   bool AllowPeriod = MAI.doesAllowPeriodsInName();
+  bool AllowUTF8 = MAI.doesAllowUTF8();
   for (unsigned i = 0, e = Str.size(); i != e; ++i) {
-    if (!isAcceptableChar(Str[i], AllowPeriod))
+    if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8))
       MangleLetter(OutName, Str[i]);
     else
       OutName.push_back(Str[i]);

Added: llvm/trunk/test/CodeGen/X86/utf8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/utf8.ll?rev=154235&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/utf8.ll (added)
+++ llvm/trunk/test/CodeGen/X86/utf8.ll Fri Apr  6 19:37:53 2012
@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; CHECK: "iΔ",4,4
+@"i\CE\94" = common global i32 0, align 4





More information about the llvm-commits mailing list