[cfe-commits] r153710 - in /cfe/trunk: lib/CodeGen/CodeGenModule.cpp test/CodeGen/darwin-string-literals.c test/CodeGen/utf16-cfstrings.c test/CodeGenObjC/2009-08-05-utf16.m

Bill Wendling isanbard at gmail.com
Thu Mar 29 17:26:18 PDT 2012


Author: void
Date: Thu Mar 29 19:26:17 2012
New Revision: 153710

URL: http://llvm.org/viewvc/llvm-project?rev=153710&view=rev
Log:
The UTF16 string referenced by a CFString should go into the __TEXT,__ustring
section. A 'normal' string will go into the __TEXT,__const section, but this
isn't good for UTF16 strings. The __ustring section allows for coalescing, among
other niceties (such as allowing the linker to easily split up strings).

Instead of outputting the UTF16 string as a series of bytes, output it as a
series of shorts. The back-end will then nicely place the UTF16 string into the
correct section, because it's a mensch.
<rdar://problem/10655949>

Added:
    cfe/trunk/test/CodeGen/utf16-cfstrings.c
Modified:
    cfe/trunk/lib/CodeGen/CodeGenModule.cpp
    cfe/trunk/test/CodeGen/darwin-string-literals.c
    cfe/trunk/test/CodeGenObjC/2009-08-05-utf16.m

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=153710&r1=153709&r2=153710&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Thu Mar 29 19:26:17 2012
@@ -1903,8 +1903,10 @@
     return Map.GetOrCreateValue(String);
   }
 
-  // Otherwise, convert the UTF8 literals into a byte string.
-  SmallVector<UTF16, 128> ToBuf(NumBytes);
+  // Otherwise, convert the UTF8 literals into a string of shorts.
+  IsUTF16 = true;
+
+  SmallVector<UTF16, 128> ToBuf(NumBytes + 1); // +1 for ending nulls.
   const UTF8 *FromPtr = (UTF8 *)String.data();
   UTF16 *ToPtr = &ToBuf[0];
 
@@ -1915,28 +1917,11 @@
   // ConvertUTF8toUTF16 returns the length in ToPtr.
   StringLength = ToPtr - &ToBuf[0];
 
-  // Render the UTF-16 string into a byte array and convert to the target byte
-  // order.
-  //
-  // FIXME: This isn't something we should need to do here.
-  SmallString<128> AsBytes;
-  AsBytes.reserve(StringLength * 2);
-  for (unsigned i = 0; i != StringLength; ++i) {
-    unsigned short Val = ToBuf[i];
-    if (TargetIsLSB) {
-      AsBytes.push_back(Val & 0xFF);
-      AsBytes.push_back(Val >> 8);
-    } else {
-      AsBytes.push_back(Val >> 8);
-      AsBytes.push_back(Val & 0xFF);
-    }
-  }
-  // Append one extra null character, the second is automatically added by our
-  // caller.
-  AsBytes.push_back(0);
-
-  IsUTF16 = true;
-  return Map.GetOrCreateValue(StringRef(AsBytes.data(), AsBytes.size()));
+  // Add an explicit null.
+  *ToPtr = 0;
+  return Map.
+    GetOrCreateValue(StringRef(reinterpret_cast<const char *>(ToBuf.data()),
+                               (StringLength + 1) * 2));
 }
 
 static llvm::StringMapEntry<llvm::Constant*> &
@@ -1990,8 +1975,15 @@
     llvm::ConstantInt::get(Ty, 0x07C8);
 
   // String pointer.
-  llvm::Constant *C = llvm::ConstantDataArray::getString(VMContext,
-                                                         Entry.getKey());
+  llvm::Constant *C = 0;
+  if (isUTF16) {
+    ArrayRef<uint16_t> Arr =
+      llvm::makeArrayRef<uint16_t>((uint16_t*)Entry.getKey().data(),
+                                   Entry.getKey().size() / 2);
+    C = llvm::ConstantDataArray::get(VMContext, Arr);
+  } else {
+    C = llvm::ConstantDataArray::getString(VMContext, Entry.getKey());
+  }
 
   llvm::GlobalValue::LinkageTypes Linkage;
   if (isUTF16)
@@ -2016,8 +2008,14 @@
     CharUnits Align = getContext().getTypeAlignInChars(getContext().CharTy);
     GV->setAlignment(Align.getQuantity());
   }
+
+  // String.
   Fields[2] = llvm::ConstantExpr::getGetElementPtr(GV, Zeros);
 
+  if (isUTF16)
+    // Cast the UTF16 string to the correct type.
+    Fields[2] = llvm::ConstantExpr::getBitCast(Fields[2], Int8PtrTy);
+
   // String length.
   Ty = getTypes().ConvertType(getContext().LongTy);
   Fields[3] = llvm::ConstantInt::get(Ty, StringLength);

Modified: cfe/trunk/test/CodeGen/darwin-string-literals.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/darwin-string-literals.c?rev=153710&r1=153709&r2=153710&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/darwin-string-literals.c (original)
+++ cfe/trunk/test/CodeGen/darwin-string-literals.c Thu Mar 29 19:26:17 2012
@@ -2,13 +2,16 @@
 
 // CHECK-LSB: @.str = private unnamed_addr constant [8 x i8] c"string0\00"
 // CHECK-LSB: @.str1 = linker_private unnamed_addr constant [8 x i8] c"string1\00"
-// CHECK-LSB: @.str2 = internal unnamed_addr constant [36 x i8] c"h\00e\00l\00l\00o\00 \00\92! \00\03& \00\90! \00w\00o\00r\00l\00d\00\00\00", align 2
+// CHECK-LSB: @.str2 = internal unnamed_addr constant [18 x i16] [i16 104, i16 101, i16 108, i16 108, i16 111, i16 32, i16 8594, i16 32, i16 9731, i16 32, i16 8592, i16 32, i16 119, i16 111, i16 114, i16 108, i16 100, i16 0], align 2
+// CHECK-LSB: @.str4 = internal unnamed_addr constant [6 x i16] [i16 116, i16 101, i16 115, i16 116, i16 8482, i16 0], align 2
+
 
 // RUN: %clang_cc1 -triple powerpc-apple-darwin9 -emit-llvm %s -o - | FileCheck -check-prefix MSB %s
 
 // CHECK-MSB: @.str = private unnamed_addr constant [8 x i8] c"string0\00"
 // CHECK-MSB: @.str1 = linker_private unnamed_addr constant [8 x i8] c"string1\00"
-// CHECK-MSB: @.str2 = internal unnamed_addr constant [36 x i8] c"\00h\00e\00l\00l\00o\00 !\92\00 &\03\00 !\90\00 \00w\00o\00r\00l\00d\00\00", align 2
+// CHECK-MSB: @.str2 = internal unnamed_addr constant [18 x i16] [i16 104, i16 101, i16 108, i16 108, i16 111, i16 32, i16 8594, i16 32, i16 9731, i16 32, i16 8592, i16 32, i16 119, i16 111, i16 114, i16 108, i16 100, i16 0], align 2
+// CHECK-MSB: @.str4 = internal unnamed_addr constant [6 x i16] [i16 116, i16 101, i16 115, i16 116, i16 8482, i16 0], align 2
 
 const char *g0 = "string0";
 const void *g1 = __builtin___CFStringMakeConstantString("string1");

Added: cfe/trunk/test/CodeGen/utf16-cfstrings.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/utf16-cfstrings.c?rev=153710&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/utf16-cfstrings.c (added)
+++ cfe/trunk/test/CodeGen/utf16-cfstrings.c Thu Mar 29 19:26:17 2012
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -o - | FileCheck %s
+// <rdar://problem/10655949>
+
+// CHECK: @.str = internal unnamed_addr constant [9 x i16] [i16 252, i16 98, i16 101, i16 114, i16 104, i16 117, i16 110, i16 100, i16 0], align 2
+
+#define CFSTR __builtin___CFStringMakeConstantString
+
+void foo() {
+  CFSTR("überhund");
+}

Modified: cfe/trunk/test/CodeGenObjC/2009-08-05-utf16.m
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenObjC/2009-08-05-utf16.m?rev=153710&r1=153709&r2=153710&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenObjC/2009-08-05-utf16.m (original)
+++ cfe/trunk/test/CodeGenObjC/2009-08-05-utf16.m Thu Mar 29 19:26:17 2012
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -emit-llvm -w -x objective-c %s -o - | FileCheck %s
 // rdar://7095855 rdar://7115749
 
-// CHECK: internal unnamed_addr constant [12 x i8]
+// CHECK: internal unnamed_addr constant [6 x i16] [i16 105, i16 80, i16 111, i16 100, i16 8482, i16 0], align 2
 void *P = @"iPod™";





More information about the cfe-commits mailing list