[cfe-commits] r160886 - in /cfe/trunk: include/clang/Analysis/Analyses/FormatString.h lib/Analysis/FormatString.cpp lib/Analysis/PrintfFormatString.cpp lib/Analysis/ScanfFormatString.cpp test/Sema/format-strings-fixit.c

David Blaikie dblaikie at gmail.com
Sat Jul 28 08:14:01 PDT 2012


On Sat, Jul 28, 2012 at 5:51 AM, Matthieu Monrocq
<matthieu.monrocq at gmail.com> wrote:
>
>
> On Fri, Jul 27, 2012 at 9:17 PM, Hans Wennborg <hans at hanshq.net> wrote:
>>
>> Author: hans
>> Date: Fri Jul 27 14:17:46 2012
>> New Revision: 160886
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=160886&view=rev
>> Log:
>> Make -Wformat walk the typedef chain when looking for size_t, etc.
>>
>> Clang's -Wformat fix-its currently suggest using "%zu" for values of
>> type size_t (in C99 or C++11 mode). However, for a type such as
>> std::vector<T>::size_type, it does not notice that type is actually
>> typedeffed to size_t, and instead suggests a format for the underlying
>> type, such as "%lu" or "%u".
>>
>> This commit makes the format string fix mechanism walk the typedef chain
>> so that it notices if the type is size_t, even if that isn't "at the
>> top".
>>
>> Modified:
>>     cfe/trunk/include/clang/Analysis/Analyses/FormatString.h
>>     cfe/trunk/lib/Analysis/FormatString.cpp
>>     cfe/trunk/lib/Analysis/PrintfFormatString.cpp
>>     cfe/trunk/lib/Analysis/ScanfFormatString.cpp
>>     cfe/trunk/test/Sema/format-strings-fixit.c
>>
>> Modified: cfe/trunk/include/clang/Analysis/Analyses/FormatString.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Analysis/Analyses/FormatString.h?rev=160886&r1=160885&r2=160886&view=diff
>>
>> ==============================================================================
>> --- cfe/trunk/include/clang/Analysis/Analyses/FormatString.h (original)
>> +++ cfe/trunk/include/clang/Analysis/Analyses/FormatString.h Fri Jul 27
>> 14:17:46 2012
>> @@ -355,6 +355,10 @@
>>    bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
>>
>>    bool hasStandardLengthConversionCombination() const;
>> +
>> +  /// For a TypedefType QT, if it is a named integer type such as size_t,
>> +  /// assign the appropriate value to LM and return true.
>> +  static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
>>  };
>>
>>  } // end analyze_format_string namespace
>>
>> Modified: cfe/trunk/lib/Analysis/FormatString.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Analysis/FormatString.cpp?rev=160886&r1=160885&r2=160886&view=diff
>>
>> ==============================================================================
>> --- cfe/trunk/lib/Analysis/FormatString.cpp (original)
>> +++ cfe/trunk/lib/Analysis/FormatString.cpp Fri Jul 27 14:17:46 2012
>> @@ -681,3 +681,37 @@
>>    }
>>    return true;
>>  }
>> +
>> +bool FormatSpecifier::namedTypeToLengthModifier(QualType QT,
>> +                                                LengthModifier &LM) {
>> +  assert(isa<TypedefType>(QT) && "Expected a TypedefType");
>> +  const TypedefNameDecl *Typedef = cast<TypedefType>(QT)->getDecl();
>> +
>> +  for (;;) {
>> +    const IdentifierInfo *Identifier = Typedef->getIdentifier();
>> +    if (Identifier->getName() == "size_t") {
>> +      LM.setKind(LengthModifier::AsSizeT);
>> +      return true;
>> +    } else if (Identifier->getName() == "ssize_t") {
>> +      // Not C99, but common in Unix.
>> +      LM.setKind(LengthModifier::AsSizeT);
>> +      return true;
>> +    } else if (Identifier->getName() == "intmax_t") {
>> +      LM.setKind(LengthModifier::AsIntMax);
>> +      return true;
>> +    } else if (Identifier->getName() == "uintmax_t") {
>> +      LM.setKind(LengthModifier::AsIntMax);
>> +      return true;
>> +    } else if (Identifier->getName() == "ptrdiff_t") {
>> +      LM.setKind(LengthModifier::AsPtrDiff);
>> +      return true;
>> +    }
>> +
>> +    QualType T = Typedef->getUnderlyingType();
>> +    if (!isa<TypedefType>(T))
>> +      break;
>> +
>> +    Typedef = cast<TypedefType>(T)->getDecl();
>> +  }
>> +  return false;
>> +}
>>
>> Modified: cfe/trunk/lib/Analysis/PrintfFormatString.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Analysis/PrintfFormatString.cpp?rev=160886&r1=160885&r2=160886&view=diff
>>
>> ==============================================================================
>> --- cfe/trunk/lib/Analysis/PrintfFormatString.cpp (original)
>> +++ cfe/trunk/lib/Analysis/PrintfFormatString.cpp Fri Jul 27 14:17:46 2012
>> @@ -447,21 +447,8 @@
>>    }
>>
>>    // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers
>> in C99.
>> -  if (isa<TypedefType>(QT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) {
>> -    const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier();
>> -    if (Identifier->getName() == "size_t") {
>> -      LM.setKind(LengthModifier::AsSizeT);
>> -    } else if (Identifier->getName() == "ssize_t") {
>> -      // Not C99, but common in Unix.
>> -      LM.setKind(LengthModifier::AsSizeT);
>> -    } else if (Identifier->getName() == "intmax_t") {
>> -      LM.setKind(LengthModifier::AsIntMax);
>> -    } else if (Identifier->getName() == "uintmax_t") {
>> -      LM.setKind(LengthModifier::AsIntMax);
>> -    } else if (Identifier->getName() == "ptrdiff_t") {
>> -      LM.setKind(LengthModifier::AsPtrDiff);
>> -    }
>> -  }
>> +  if (isa<TypedefType>(QT) && (LangOpt.C99 || LangOpt.CPlusPlus0x))
>> +    namedTypeToLengthModifier(QT, LM);
>>
>>    // If fixing the length modifier was enough, we are done.
>>    const analyze_printf::ArgTypeResult &ATR = getArgType(Ctx,
>> IsObjCLiteral);
>>
>> Modified: cfe/trunk/lib/Analysis/ScanfFormatString.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Analysis/ScanfFormatString.cpp?rev=160886&r1=160885&r2=160886&view=diff
>>
>> ==============================================================================
>> --- cfe/trunk/lib/Analysis/ScanfFormatString.cpp (original)
>> +++ cfe/trunk/lib/Analysis/ScanfFormatString.cpp Fri Jul 27 14:17:46 2012
>> @@ -382,21 +382,8 @@
>>    }
>>
>>    // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers
>> in C99.
>> -  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) {
>> -    const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier();
>> -    if (Identifier->getName() == "size_t") {
>> -      LM.setKind(LengthModifier::AsSizeT);
>> -    } else if (Identifier->getName() == "ssize_t") {
>> -      // Not C99, but common in Unix.
>> -      LM.setKind(LengthModifier::AsSizeT);
>> -    } else if (Identifier->getName() == "intmax_t") {
>> -      LM.setKind(LengthModifier::AsIntMax);
>> -    } else if (Identifier->getName() == "uintmax_t") {
>> -      LM.setKind(LengthModifier::AsIntMax);
>> -    } else if (Identifier->getName() == "ptrdiff_t") {
>> -      LM.setKind(LengthModifier::AsPtrDiff);
>> -    }
>> -  }
>> +  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x))
>> +    namedTypeToLengthModifier(PT, LM);
>>
>>    // If fixing the length modifier was enough, we are done.
>>    const analyze_scanf::ScanfArgTypeResult &ATR = getArgType(Ctx);
>>
>> Modified: cfe/trunk/test/Sema/format-strings-fixit.c
>> URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Sema/format-strings-fixit.c?rev=160886&r1=160885&r2=160886&view=diff
>>
>> ==============================================================================
>> --- cfe/trunk/test/Sema/format-strings-fixit.c (original)
>> +++ cfe/trunk/test/Sema/format-strings-fixit.c Fri Jul 27 14:17:46 2012
>> @@ -64,6 +64,18 @@
>>    printf("%f", (uintmax_t) 42);
>>    printf("%f", (ptrdiff_t) 42);
>>
>> +  // Look beyond the first typedef.
>> +  typedef size_t my_size_type;
>> +  typedef intmax_t my_intmax_type;
>> +  typedef uintmax_t my_uintmax_type;
>> +  typedef ptrdiff_t my_ptrdiff_type;
>> +  typedef int my_int_type;
>> +  printf("%f", (my_size_type) 42);
>> +  printf("%f", (my_intmax_type) 42);
>> +  printf("%f", (my_uintmax_type) 42);
>> +  printf("%f", (my_ptrdiff_type) 42);
>> +  printf("%f", (my_int_type) 42);
>> +
>>    // string
>>    printf("%ld", "foo");
>>
>> @@ -122,6 +134,18 @@
>>    scanf("%f", &uIntmaxVar);
>>    scanf("%f", &ptrdiffVar);
>>
>> +  // Look beyond the first typedef for named integer types.
>> +  typedef size_t my_size_type;
>> +  typedef intmax_t my_intmax_type;
>> +  typedef uintmax_t my_uintmax_type;
>> +  typedef ptrdiff_t my_ptrdiff_type;
>> +  typedef int my_int_type;
>> +  scanf("%f", (my_size_type*)&sizeVar);
>> +  scanf("%f", (my_intmax_type*)&intmaxVar);
>> +  scanf("%f", (my_uintmax_type*)&uIntmaxVar);
>> +  scanf("%f", (my_ptrdiff_type*)&ptrdiffVar);
>> +  scanf("%f", (my_int_type*)&intVar);
>> +
>>    // Preserve the original formatting.
>>    scanf("%o", &longVar);
>>    scanf("%u", &longVar);
>> @@ -162,6 +186,11 @@
>>  // CHECK: printf("%jd", (intmax_t) 42);
>>  // CHECK: printf("%ju", (uintmax_t) 42);
>>  // CHECK: printf("%td", (ptrdiff_t) 42);
>> +// CHECK: printf("%zu", (my_size_type) 42);
>> +// CHECK: printf("%jd", (my_intmax_type) 42);
>> +// CHECK: printf("%ju", (my_uintmax_type) 42);
>> +// CHECK: printf("%td", (my_ptrdiff_type) 42);
>> +// CHECK: printf("%d", (my_int_type) 42);
>>  // CHECK: printf("%s", "foo");
>>  // CHECK: printf("%lo", (long) 42);
>>  // CHECK: printf("%lu", (long) 42);
>> @@ -193,6 +222,11 @@
>>  // CHECK: scanf("%jd", &intmaxVar);
>>  // CHECK: scanf("%ju", &uIntmaxVar);
>>  // CHECK: scanf("%td", &ptrdiffVar);
>> +// CHECK: scanf("%zu", (my_size_type*)&sizeVar);
>> +// CHECK: scanf("%jd", (my_intmax_type*)&intmaxVar);
>> +// CHECK: scanf("%ju", (my_uintmax_type*)&uIntmaxVar);
>> +// CHECK: scanf("%td", (my_ptrdiff_type*)&ptrdiffVar);
>> +// CHECK: scanf("%d", (my_int_type*)&intVar);
>>  // CHECK: scanf("%lo", &longVar);
>>  // CHECK: scanf("%lu", &longVar);
>>  // CHECK: scanf("%lx", &longVar);
>>
>>
>
> Hi Hans,
>
> I wonder why it would be necessary to "know" that the
> std::vector<...>::size_type is a size_t instead of being a long unsigned
> int. The very purpose of the typedef to `size_type` in the first place is to
> abstract the underlying type, and different STL implementations are free to
> alias it differently as far as I know, so...
>
> ... could you explain why it is important for you ?

I'd hazard a guess that it's so we don't have to special case for the
STL container names (& just assuming any type called "size_type" is
going to be arch-size-dependent might hit a lot of cases where the
name is used but that property is not true (in user code, etc)).
Finding a general mechanism where the STL case falls out is usually
preferable - oh, and it works the other way too. Now if a user writes
code with their own typedef of size_t that happens to have some other
spelling we've never seen before, we'll still issue the right
suggestion.

- David



More information about the cfe-commits mailing list