r282822 - Move UTF functions into namespace llvm.
Justin Lebar via cfe-commits
cfe-commits at lists.llvm.org
Thu Sep 29 17:38:46 PDT 2016
Author: jlebar
Date: Thu Sep 29 19:38:45 2016
New Revision: 282822
URL: http://llvm.org/viewvc/llvm-project?rev=282822&view=rev
Log:
Move UTF functions into namespace llvm.
Summary:
This lets people link against LLVM and their own version of the UTF
library.
I determined this only affects llvm, clang, lld, and lldb by running
$ git grep -wl 'UTF[0-9]\+\|\bConvertUTF\bisLegalUTF\|getNumBytesFor' | cut -f 1 -d '/' | sort | uniq
clang
lld
lldb
llvm
Tested with
ninja lldb
ninja check-clang check-llvm check-lld
(ninja check-lldb doesn't complete for me with or without this patch.)
Reviewers: rnk
Subscribers: klimek, beanz, mgorny, llvm-commits
Differential Revision: https://reviews.llvm.org/D24996
Modified:
cfe/trunk/lib/Analysis/FormatString.cpp
cfe/trunk/lib/CodeGen/CodeGenModule.cpp
cfe/trunk/lib/Format/Encoding.h
cfe/trunk/lib/Frontend/TextDiagnostic.cpp
cfe/trunk/lib/Lex/Lexer.cpp
cfe/trunk/lib/Lex/LiteralSupport.cpp
cfe/trunk/lib/Sema/SemaChecking.cpp
cfe/trunk/lib/Sema/SemaExpr.cpp
Modified: cfe/trunk/lib/Analysis/FormatString.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Analysis/FormatString.cpp?rev=282822&r1=282821&r2=282822&view=diff
==============================================================================
--- cfe/trunk/lib/Analysis/FormatString.cpp (original)
+++ cfe/trunk/lib/Analysis/FormatString.cpp Thu Sep 29 19:38:45 2016
@@ -266,14 +266,15 @@ bool clang::analyze_format_string::Parse
if (SpecifierBegin + 1 >= FmtStrEnd)
return false;
- const UTF8 *SB = reinterpret_cast<const UTF8 *>(SpecifierBegin + 1);
- const UTF8 *SE = reinterpret_cast<const UTF8 *>(FmtStrEnd);
+ const llvm::UTF8 *SB =
+ reinterpret_cast<const llvm::UTF8 *>(SpecifierBegin + 1);
+ const llvm::UTF8 *SE = reinterpret_cast<const llvm::UTF8 *>(FmtStrEnd);
const char FirstByte = *SB;
// If the invalid specifier is a multibyte UTF-8 string, return the
// total length accordingly so that the conversion specifier can be
// properly updated to reflect a complete UTF-8 specifier.
- unsigned NumBytes = getNumBytesForUTF8(FirstByte);
+ unsigned NumBytes = llvm::getNumBytesForUTF8(FirstByte);
if (NumBytes == 1)
return false;
if (SB + NumBytes > SE)
Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=282822&r1=282821&r2=282822&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Thu Sep 29 19:38:45 2016
@@ -3136,13 +3136,12 @@ GetConstantCFStringEntry(llvm::StringMap
// Otherwise, convert the UTF8 literals into a string of shorts.
IsUTF16 = true;
- SmallVector<UTF16, 128> ToBuf(NumBytes + 1); // +1 for ending nulls.
- const UTF8 *FromPtr = (const UTF8 *)String.data();
- UTF16 *ToPtr = &ToBuf[0];
+ SmallVector<llvm::UTF16, 128> ToBuf(NumBytes + 1); // +1 for ending nulls.
+ const llvm::UTF8 *FromPtr = (const llvm::UTF8 *)String.data();
+ llvm::UTF16 *ToPtr = &ToBuf[0];
- (void)ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes,
- &ToPtr, ToPtr + NumBytes,
- strictConversion);
+ (void)llvm::ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, &ToPtr,
+ ToPtr + NumBytes, llvm::strictConversion);
// ConvertUTF8toUTF16 returns the length in ToPtr.
StringLength = ToPtr - &ToBuf[0];
Modified: cfe/trunk/lib/Format/Encoding.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Encoding.h?rev=282822&r1=282821&r2=282822&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Encoding.h (original)
+++ cfe/trunk/lib/Format/Encoding.h Thu Sep 29 19:38:45 2016
@@ -33,16 +33,17 @@ enum Encoding {
/// \brief Detects encoding of the Text. If the Text can be decoded using UTF-8,
/// it is considered UTF8, otherwise we treat it as some 8-bit encoding.
inline Encoding detectEncoding(StringRef Text) {
- const UTF8 *Ptr = reinterpret_cast<const UTF8 *>(Text.begin());
- const UTF8 *BufEnd = reinterpret_cast<const UTF8 *>(Text.end());
- if (::isLegalUTF8String(&Ptr, BufEnd))
+ const llvm::UTF8 *Ptr = reinterpret_cast<const llvm::UTF8 *>(Text.begin());
+ const llvm::UTF8 *BufEnd = reinterpret_cast<const llvm::UTF8 *>(Text.end());
+ if (llvm::isLegalUTF8String(&Ptr, BufEnd))
return Encoding_UTF8;
return Encoding_Unknown;
}
inline unsigned getCodePointCountUTF8(StringRef Text) {
unsigned CodePoints = 0;
- for (size_t i = 0, e = Text.size(); i < e; i += getNumBytesForUTF8(Text[i])) {
+ for (size_t i = 0, e = Text.size(); i < e;
+ i += llvm::getNumBytesForUTF8(Text[i])) {
++CodePoints;
}
return CodePoints;
@@ -97,7 +98,7 @@ inline unsigned columnWidthWithTabs(Stri
inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) {
switch (Encoding) {
case Encoding_UTF8:
- return getNumBytesForUTF8(FirstChar);
+ return llvm::getNumBytesForUTF8(FirstChar);
default:
return 1;
}
@@ -136,7 +137,7 @@ inline unsigned getEscapeSequenceLength(
++I;
return I;
}
- return 1 + getNumBytesForUTF8(Text[1]);
+ return 1 + llvm::getNumBytesForUTF8(Text[1]);
}
}
Modified: cfe/trunk/lib/Frontend/TextDiagnostic.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/TextDiagnostic.cpp?rev=282822&r1=282821&r2=282822&view=diff
==============================================================================
--- cfe/trunk/lib/Frontend/TextDiagnostic.cpp (original)
+++ cfe/trunk/lib/Frontend/TextDiagnostic.cpp Thu Sep 29 19:38:45 2016
@@ -119,16 +119,17 @@ printableTextForNextCharacter(StringRef
begin = reinterpret_cast<unsigned char const *>(&*(SourceLine.begin() + *i));
end = begin + (SourceLine.size() - *i);
- if (isLegalUTF8Sequence(begin, end)) {
- UTF32 c;
- UTF32 *cptr = &c;
+ if (llvm::isLegalUTF8Sequence(begin, end)) {
+ llvm::UTF32 c;
+ llvm::UTF32 *cptr = &c;
unsigned char const *original_begin = begin;
- unsigned char const *cp_end = begin+getNumBytesForUTF8(SourceLine[*i]);
+ unsigned char const *cp_end =
+ begin + llvm::getNumBytesForUTF8(SourceLine[*i]);
- ConversionResult res = ConvertUTF8toUTF32(&begin, cp_end, &cptr, cptr+1,
- strictConversion);
+ llvm::ConversionResult res = llvm::ConvertUTF8toUTF32(
+ &begin, cp_end, &cptr, cptr + 1, llvm::strictConversion);
(void)res;
- assert(conversionOK==res);
+ assert(llvm::conversionOK == res);
assert(0 < begin-original_begin
&& "we must be further along in the string now");
*i += begin-original_begin;
Modified: cfe/trunk/lib/Lex/Lexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Lexer.cpp?rev=282822&r1=282821&r2=282822&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/Lexer.cpp (original)
+++ cfe/trunk/lib/Lex/Lexer.cpp Thu Sep 29 19:38:45 2016
@@ -1485,13 +1485,13 @@ bool Lexer::tryConsumeIdentifierUCN(cons
bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) {
const char *UnicodePtr = CurPtr;
- UTF32 CodePoint;
- ConversionResult Result =
- llvm::convertUTF8Sequence((const UTF8 **)&UnicodePtr,
- (const UTF8 *)BufferEnd,
+ llvm::UTF32 CodePoint;
+ llvm::ConversionResult Result =
+ llvm::convertUTF8Sequence((const llvm::UTF8 **)&UnicodePtr,
+ (const llvm::UTF8 *)BufferEnd,
&CodePoint,
- strictConversion);
- if (Result != conversionOK ||
+ llvm::strictConversion);
+ if (Result != llvm::conversionOK ||
!isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts))
return false;
@@ -3625,17 +3625,17 @@ LexNextToken:
break;
}
- UTF32 CodePoint;
+ llvm::UTF32 CodePoint;
// We can't just reset CurPtr to BufferPtr because BufferPtr may point to
// an escaped newline.
--CurPtr;
- ConversionResult Status =
- llvm::convertUTF8Sequence((const UTF8 **)&CurPtr,
- (const UTF8 *)BufferEnd,
+ llvm::ConversionResult Status =
+ llvm::convertUTF8Sequence((const llvm::UTF8 **)&CurPtr,
+ (const llvm::UTF8 *)BufferEnd,
&CodePoint,
- strictConversion);
- if (Status == conversionOK) {
+ llvm::strictConversion);
+ if (Status == llvm::conversionOK) {
if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
return true; // KeepWhitespaceMode
Modified: cfe/trunk/lib/Lex/LiteralSupport.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/LiteralSupport.cpp?rev=282822&r1=282821&r2=282822&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/LiteralSupport.cpp (original)
+++ cfe/trunk/lib/Lex/LiteralSupport.cpp Thu Sep 29 19:38:45 2016
@@ -402,7 +402,7 @@ static void EncodeUCNEscape(const char *
if (CharByteWidth == 4) {
// FIXME: Make the type of the result buffer correct instead of
// using reinterpret_cast.
- UTF32 *ResultPtr = reinterpret_cast<UTF32*>(ResultBuf);
+ llvm::UTF32 *ResultPtr = reinterpret_cast<llvm::UTF32*>(ResultBuf);
*ResultPtr = UcnVal;
ResultBuf += 4;
return;
@@ -411,7 +411,7 @@ static void EncodeUCNEscape(const char *
if (CharByteWidth == 2) {
// FIXME: Make the type of the result buffer correct instead of
// using reinterpret_cast.
- UTF16 *ResultPtr = reinterpret_cast<UTF16*>(ResultBuf);
+ llvm::UTF16 *ResultPtr = reinterpret_cast<llvm::UTF16*>(ResultBuf);
if (UcnVal <= (UTF32)0xFFFF) {
*ResultPtr = UcnVal;
@@ -1114,11 +1114,11 @@ CharLiteralParser::CharLiteralParser(con
char const *tmp_in_start = start;
uint32_t *tmp_out_start = buffer_begin;
- ConversionResult res =
- ConvertUTF8toUTF32(reinterpret_cast<UTF8 const **>(&start),
- reinterpret_cast<UTF8 const *>(begin),
- &buffer_begin, buffer_end, strictConversion);
- if (res != conversionOK) {
+ llvm::ConversionResult res =
+ llvm::ConvertUTF8toUTF32(reinterpret_cast<llvm::UTF8 const **>(&start),
+ reinterpret_cast<llvm::UTF8 const *>(begin),
+ &buffer_begin, buffer_end, llvm::strictConversion);
+ if (res != llvm::conversionOK) {
// If we see bad encoding for unprefixed character literals, warn and
// simply copy the byte values, for compatibility with gcc and
// older versions of clang.
@@ -1510,13 +1510,13 @@ void StringLiteralParser::init(ArrayRef<
if (CharByteWidth == 4) {
// FIXME: Make the type of the result buffer correct instead of
// using reinterpret_cast.
- UTF32 *ResultWidePtr = reinterpret_cast<UTF32*>(ResultPtr);
+ llvm::UTF32 *ResultWidePtr = reinterpret_cast<llvm::UTF32*>(ResultPtr);
*ResultWidePtr = ResultChar;
ResultPtr += 4;
} else if (CharByteWidth == 2) {
// FIXME: Make the type of the result buffer correct instead of
// using reinterpret_cast.
- UTF16 *ResultWidePtr = reinterpret_cast<UTF16*>(ResultPtr);
+ llvm::UTF16 *ResultWidePtr = reinterpret_cast<llvm::UTF16*>(ResultPtr);
*ResultWidePtr = ResultChar & 0xFFFF;
ResultPtr += 2;
} else {
@@ -1531,12 +1531,12 @@ void StringLiteralParser::init(ArrayRef<
if (CharByteWidth == 4) {
// FIXME: Make the type of the result buffer correct instead of
// using reinterpret_cast.
- UTF32 *ResultWidePtr = reinterpret_cast<UTF32*>(ResultBuf.data());
+ llvm::UTF32 *ResultWidePtr = reinterpret_cast<llvm::UTF32*>(ResultBuf.data());
ResultWidePtr[0] = GetNumStringChars() - 1;
} else if (CharByteWidth == 2) {
// FIXME: Make the type of the result buffer correct instead of
// using reinterpret_cast.
- UTF16 *ResultWidePtr = reinterpret_cast<UTF16*>(ResultBuf.data());
+ llvm::UTF16 *ResultWidePtr = reinterpret_cast<llvm::UTF16*>(ResultBuf.data());
ResultWidePtr[0] = GetNumStringChars() - 1;
} else {
assert(CharByteWidth == 1 && "Unexpected char width");
@@ -1570,7 +1570,7 @@ void StringLiteralParser::init(ArrayRef<
static const char *resyncUTF8(const char *Err, const char *End) {
if (Err == End)
return End;
- End = Err + std::min<unsigned>(getNumBytesForUTF8(*Err), End-Err);
+ End = Err + std::min<unsigned>(llvm::getNumBytesForUTF8(*Err), End-Err);
while (++Err != End && (*Err & 0xC0) == 0x80)
;
return Err;
@@ -1582,7 +1582,7 @@ static const char *resyncUTF8(const char
bool StringLiteralParser::CopyStringFragment(const Token &Tok,
const char *TokBegin,
StringRef Fragment) {
- const UTF8 *ErrorPtrTmp;
+ const llvm::UTF8 *ErrorPtrTmp;
if (ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr, ErrorPtrTmp))
return false;
Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=282822&r1=282821&r2=282822&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Thu Sep 29 19:38:45 2016
@@ -3262,15 +3262,15 @@ bool Sema::CheckObjCString(Expr *Arg) {
if (Literal->containsNonAsciiOrNull()) {
StringRef String = Literal->getString();
unsigned NumBytes = String.size();
- SmallVector<UTF16, 128> ToBuf(NumBytes);
- const UTF8 *FromPtr = (const UTF8 *)String.data();
- UTF16 *ToPtr = &ToBuf[0];
-
- ConversionResult Result = ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes,
- &ToPtr, ToPtr + NumBytes,
- strictConversion);
+ SmallVector<llvm::UTF16, 128> ToBuf(NumBytes);
+ const llvm::UTF8 *FromPtr = (const llvm::UTF8 *)String.data();
+ llvm::UTF16 *ToPtr = &ToBuf[0];
+
+ llvm::ConversionResult Result =
+ llvm::ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, &ToPtr,
+ ToPtr + NumBytes, llvm::strictConversion);
// Check for conversion failure.
- if (Result != conversionOK)
+ if (Result != llvm::conversionOK)
Diag(Arg->getLocStart(),
diag::warn_cfstring_truncated) << Arg->getSourceRange();
}
@@ -4777,16 +4777,16 @@ CheckFormatHandler::HandleInvalidConvers
// hex value.
std::string CodePointStr;
if (!llvm::sys::locale::isPrint(*csStart)) {
- UTF32 CodePoint;
- const UTF8 **B = reinterpret_cast<const UTF8 **>(&csStart);
- const UTF8 *E =
- reinterpret_cast<const UTF8 *>(csStart + csLen);
- ConversionResult Result =
- llvm::convertUTF8Sequence(B, E, &CodePoint, strictConversion);
+ llvm::UTF32 CodePoint;
+ const llvm::UTF8 **B = reinterpret_cast<const llvm::UTF8 **>(&csStart);
+ const llvm::UTF8 *E =
+ reinterpret_cast<const llvm::UTF8 *>(csStart + csLen);
+ llvm::ConversionResult Result =
+ llvm::convertUTF8Sequence(B, E, &CodePoint, llvm::strictConversion);
- if (Result != conversionOK) {
+ if (Result != llvm::conversionOK) {
unsigned char FirstChar = *csStart;
- CodePoint = (UTF32)FirstChar;
+ CodePoint = (llvm::UTF32)FirstChar;
}
llvm::raw_string_ostream OS(CodePointStr);
Modified: cfe/trunk/lib/Sema/SemaExpr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExpr.cpp?rev=282822&r1=282821&r2=282822&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaExpr.cpp (original)
+++ cfe/trunk/lib/Sema/SemaExpr.cpp Thu Sep 29 19:38:45 2016
@@ -3070,8 +3070,9 @@ static void ConvertUTF8ToWideString(unsi
SmallString<32> &Target) {
Target.resize(CharByteWidth * (Source.size() + 1));
char *ResultPtr = &Target[0];
- const UTF8 *ErrorPtr;
- bool success = ConvertUTF8toWide(CharByteWidth, Source, ResultPtr, ErrorPtr);
+ const llvm::UTF8 *ErrorPtr;
+ bool success =
+ llvm::ConvertUTF8toWide(CharByteWidth, Source, ResultPtr, ErrorPtr);
(void)success;
assert(success);
Target.resize(ResultPtr - &Target[0]);
More information about the cfe-commits
mailing list