[cfe-commits] r143418 - in /cfe/trunk/test: CodeGen/string-literal-unicode-conversion.c Lexer/string-literal-encoding.c

Eli Friedman eli.friedman at gmail.com
Mon Oct 31 19:26:36 PDT 2011


Author: efriedma
Date: Mon Oct 31 21:26:36 2011
New Revision: 143418

URL: http://llvm.org/viewvc/llvm-project?rev=143418&view=rev
Log:
Tests for UTF-8 encoding in strings in source code.  Patch by Seth Cantrell.


Added:
    cfe/trunk/test/CodeGen/string-literal-unicode-conversion.c
    cfe/trunk/test/Lexer/string-literal-encoding.c

Added: cfe/trunk/test/CodeGen/string-literal-unicode-conversion.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/string-literal-unicode-conversion.c?rev=143418&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/string-literal-unicode-conversion.c (added)
+++ cfe/trunk/test/CodeGen/string-literal-unicode-conversion.c Mon Oct 31 21:26:36 2011
@@ -0,0 +1,58 @@
+// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
+// RUN: %clang_cc1 -x c++ -std=c++0x -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CPP0X %s
+// RUN: %clang_cc1 -x c++ -std=c++0x -fshort-wchar -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=SHORTWCHAR %s
+
+// This file contains a mix of ISO-8859-1 and UTF-8 encoded data.
+// the literal assigned to 'aa' should be the ISO-8859-1 encoding for the code
+// points U+00C0 U+00E9 U+00EE U+00F5 U+00FC
+
+// The rest of the literals should contain the UTF-8 encoding for U+041A U+043E
+// U+0448 U+043A U+0430
+
+#ifndef __cplusplus
+#include <wchar.h>
+#endif
+
+#ifdef __cplusplus
+extern "C"
+#endif
+void f() {
+  // CHECK-C: private unnamed_addr constant [6 x i8] c"\C0\E9\EE\F5\FC\00", align 1
+  // CHECK-CPP0X: private unnamed_addr constant [6 x i8] c"\C0\E9\EE\F5\FC\00", align 1
+  char const *aa = "Àéîõü";
+  
+  // CHECK-C: private unnamed_addr constant [11 x i8] c"\D0\9A\D0\BE\D1\88\D0\BA\D0\B0\00", align 1
+  // CHECK-CPP0X: private unnamed_addr constant [11 x i8] c"\D0\9A\D0\BE\D1\88\D0\BA\D0\B0\00", align 1
+  char const *a = "Кошка";
+  
+  // CHECK-C: private unnamed_addr constant [6 x i32] [i32 1050, i32 1086, i32 1096, i32 1082, i32 1072, i32 0], align 4
+  // CHECK-SHORTWCHAR: private unnamed_addr constant [6 x i16] [i16 1050, i16 1086, i16 1096, i16 1082, i16 1072, i16 0], align 2
+  // CHECK-CPP0X: private unnamed_addr constant [6 x i32] [i32 1050, i32 1086, i32 1096, i32 1082, i32 1072, i32 0], align 4
+  wchar_t const *b = L"Кошка";
+  
+#if __cplusplus >= 201103L
+  
+  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"1\D0\9A\D0\BE\D1\88\D0\BA\D0\B0\00", align 1
+  char const *c = u8"1Кошка";
+    
+  // CHECK-CPP0X: private unnamed_addr constant [7 x i16] [i16 50, i16 1050, i16 1086, i16 1096, i16 1082, i16 1072, i16 0], align 2
+  char16_t const *e = u"2Кошка";
+  
+  // CHECK-CPP0X: private unnamed_addr constant [7 x i32] [i32 51, i32 1050, i32 1086, i32 1096, i32 1082, i32 1072, i32 0], align 4
+  char32_t const *f = U"3Кошка";
+
+  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"4\D0\9A\D0\BE\D1\88\D0\BA\D0\B0\00", align 1
+  char const *d = u8R"(4Кошка)";
+  
+  // CHECK-CPP0X: private unnamed_addr constant [7 x i16] [i16 53, i16 1050, i16 1086, i16 1096, i16 1082, i16 1072, i16 0], align 2
+  char16_t const *g = uR"(5Кошка)";
+  
+  // CHECK-CPP0X: private unnamed_addr constant [7 x i32] [i32 54, i32 1050, i32 1086, i32 1096, i32 1082, i32 1072, i32 0], align 4
+  char32_t const *h = UR"(6Кошка)";
+  
+  // CHECK-SHORTWCHAR: private unnamed_addr constant [7 x i16] [i16 55, i16 1050, i16 1086, i16 1096, i16 1082, i16 1072, i16 0], align 2
+  // CHECK-CPP0X: private unnamed_addr constant [7 x i32] [i32 55, i32 1050, i32 1086, i32 1096, i32 1082, i32 1072, i32 0], align 4
+  wchar_t const *i = LR"(7Кошка)";
+  
+#endif
+}

Added: cfe/trunk/test/Lexer/string-literal-encoding.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Lexer/string-literal-encoding.c?rev=143418&view=auto
==============================================================================
--- cfe/trunk/test/Lexer/string-literal-encoding.c (added)
+++ cfe/trunk/test/Lexer/string-literal-encoding.c Mon Oct 31 21:26:36 2011
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -x c++ -std=c++0x -fsyntax-only -verify %s
+
+// This file should be encoded using ISO-8859-1, the string literals should
+// contain the ISO-8859-1 encoding for the code points U+00C0 U+00E9 U+00EE
+// U+00F5 U+00FC
+
+void f() {
+    wchar_t const *a = L"Àéîõü"; // expected-error {{ illegal sequence in string literal }}
+
+    char16_t const *b = u"Àéîõü"; // expected-error {{ illegal sequence in string literal }}
+    char32_t const *c = U"Àéîõü"; // expected-error {{ illegal sequence in string literal }}
+    wchar_t const *d = LR"(Àéîõü)"; // expected-error {{ illegal sequence in string literal }}
+    char16_t const *e = uR"(Àéîõü)"; // expected-error {{ illegal sequence in string literal }}
+    char32_t const *f = UR"(Àéîõü)"; // expected-error {{ illegal sequence in string literal }}
+}





More information about the cfe-commits mailing list