[clang] modified AST for SEI redemption project (PR #111705)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Oct 9 08:55:58 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Nicholas Reimer (sei-nreimer)
<details>
<summary>Changes</summary>
AST output modifications primarily focused on JSON enhancements for the SEI Redemption project. Some of the key changes are:
1. Recursive Pointer Resolution
- Modified the AST to accurately track multiple levels of pointer indirection during analysis
2. Function Pointer Identification
- Improved function pointer type identification handling, enabling correct resolution and analysis of function pointer assignments and calls within the AST
3. QualType Enhancements
- Updated QualType to better represent types with qualifiers such as `const`, `volatile`, and others
4. QualDetails Addition
- Introduced a new structure, QualDetails, to encapsulate additional metadata about type qualifiers, e.g., `ptr`, `signed`, `float`, `struct`, `union`, `array`, `promotable`, `integer`, `func_ptr`
5. Return Type Information
- Updated the AST to expose detailed return type information, similar to VarDecl
6. JSON Debloating:
- Reduced the size of JSON output by caching IDs and then using the `refID` key for referring back to the original ID
7. Added Missing Range and ID for CXXCtorInitializer
### Examples
```c
// recursive pointer resolution
int ** c;
```
![image](https://github.com/user-attachments/assets/e753bacb-67ec-47ae-b7a3-a24096cf6f31)
```c
// function pointer information
long (*foo)(int (*)(short));
```
![image](https://github.com/user-attachments/assets/e97abf8e-4acc-4716-8f34-2e0a80d0d94c)
```c
// return type information
int runFunctionTestA( char a );
```
![image](https://github.com/user-attachments/assets/bc348133-a778-4048-8eea-f81991c3e585)
```c
// refID usage example.
int a; // (not in image) first time encountering an int
int *b; // (not in image) first time encountering int *, but second time encountering int
int **c; // first time encountering int **, but second time encountering int * and third time int
```
![image](https://github.com/user-attachments/assets/77f7f6d0-be66-4618-9155-0bc7c8f3ff34)
---
Full diff: https://github.com/llvm/llvm-project/pull/111705.diff
6 Files Affected:
- (modified) clang/include/clang/AST/ASTNodeTraverser.h (+55-4)
- (modified) clang/include/clang/AST/JSONNodeDumper.h (+43-2)
- (modified) clang/include/clang/AST/TextNodeDumper.h (+22)
- (modified) clang/lib/AST/JSONNodeDumper.cpp (+120-4)
- (modified) clang/lib/AST/TextNodeDumper.cpp (+8)
- (modified) clang/unittests/AST/ASTTraverserTest.cpp (+4)
``````````diff
diff --git a/clang/include/clang/AST/ASTNodeTraverser.h b/clang/include/clang/AST/ASTNodeTraverser.h
index a443a88bab1f2d..3299011771f332 100644
--- a/clang/include/clang/AST/ASTNodeTraverser.h
+++ b/clang/include/clang/AST/ASTNodeTraverser.h
@@ -11,6 +11,22 @@
// similar to RecursiveASTVisitor.
//
//===----------------------------------------------------------------------===//
+//
+// Modifications to this file by SEI staff are copyright Carnegie Mellon
+// University and contributed under the Apache License v2.0 with LLVM
+// Exceptions.
+//
+// SEI Contributions are made with funding sand support from the Department of
+// Defense under Contract No. FA8702-15-D-0002 with Carnegie Mellon University
+// for the operation of the Software Engineering Institute, a federally funded
+// research and development center.
+//
+// The view, opinions, and/or findings contained in this material are those of
+// the author(s) and should not be construed as an official Government position,
+// policy, or decision, unless designated by other documentation.
+// DM24-0194
+//
+//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_AST_ASTNODETRAVERSER_H
#define LLVM_CLANG_AST_ASTNODETRAVERSER_H
@@ -177,14 +193,34 @@ class ASTNodeTraverser
if (!SQT.Quals.hasQualifiers())
return Visit(SQT.Ty);
- getNodeDelegate().AddChild([=] {
+ // SEI: changed from default label to "qualTypeDetail"
+ getNodeDelegate().AddChild("qualTypeDetail", [this, T] {
getNodeDelegate().Visit(T);
Visit(T.split().Ty);
});
+
+ // SEI function pointer support. this gets called whenever the three
+ // conditions are met:
+ // 1. the function pointer is not typedef'd
+ // 2. after Visit(VarDecl *) gets called
+ // 3. if VarDecl determines this is a function pointer
+ if (T->isFunctionPointerType()) {
+ // create as a child node to this type
+ getNodeDelegate().AddChild(
+ [=] { getNodeDelegate().Visit(T->getPointeeType()); });
+ }
+
+ // SEI: traverse PointerType information
+ if (T->isPointerType())
+ Visit(T->getPointeeType());
}
+ // SEI: traverse ReturnType information
+ void VisitReturnType(QualType T) { getNodeDelegate().VisitReturnType(T); }
+
void Visit(const Type *T) {
- getNodeDelegate().AddChild([=] {
+ // SEI: renamed this from default label
+ getNodeDelegate().AddChild("typeDetails", [this, T] {
getNodeDelegate().Visit(T);
if (!T)
return;
@@ -209,7 +245,8 @@ class ASTNodeTraverser
}
void Visit(const Attr *A) {
- getNodeDelegate().AddChild([=] {
+ // SEI: renamed from default label
+ getNodeDelegate().AddChild("attrDetails", [this, A] {
getNodeDelegate().Visit(A);
ConstAttrVisitor<Derived>::Visit(A);
});
@@ -410,8 +447,17 @@ class ASTNodeTraverser
Visit(T->getSizeExpr());
}
void VisitVectorType(const VectorType *T) { Visit(T->getElementType()); }
- void VisitFunctionType(const FunctionType *T) { Visit(T->getReturnType()); }
+ void VisitFunctionType(const FunctionType *T) {
+ // SEI: add functionDetails, incl. return type
+ getNodeDelegate().AddChild("functionDetails", [this, T] {
+ getNodeDelegate().VisitFunctionType(T);
+ getNodeDelegate().VisitReturnType(T->getReturnType());
+ });
+ }
+
void VisitFunctionProtoType(const FunctionProtoType *T) {
+
+ // SEI: visit the function type. this will force the return type info too.
VisitFunctionType(T);
for (const QualType &PT : T->getParamTypes())
Visit(PT);
@@ -560,6 +606,11 @@ class ASTNodeTraverser
Visit(TSI->getTypeLoc());
if (D->hasInit())
Visit(D->getInit());
+
+ // SEI: if this is a function pointer, then we need to get the
+ // FunctionProtoType and then make add'l visits. if the FP is typedef'd,
+ // then this behavior occurs for us outside of Visit(VarDecl *)
+ getNodeDelegate().Visit(D->getType());
}
void VisitDecompositionDecl(const DecompositionDecl *D) {
diff --git a/clang/include/clang/AST/JSONNodeDumper.h b/clang/include/clang/AST/JSONNodeDumper.h
index 9422c8fceccfbd..25cec3abcbae82 100644
--- a/clang/include/clang/AST/JSONNodeDumper.h
+++ b/clang/include/clang/AST/JSONNodeDumper.h
@@ -10,6 +10,22 @@
// a JSON.
//
//===----------------------------------------------------------------------===//
+//
+// Modifications to this file by SEI staff are copyright Carnegie Mellon
+// University and contributed under the Apache License v2.0 with LLVM
+// Exceptions.
+//
+// SEI Contributions are made with funding sand support from the Department of
+// Defense under Contract No. FA8702-15-D-0002 with Carnegie Mellon University
+// for the operation of the Software Engineering Institute, a federally funded
+// research and development center.
+//
+// The view, opinions, and/or findings contained in this material are those of
+// the author(s) and should not be construed as an official Government position,
+// policy, or decision, unless designated by other documentation.
+// DM24-0194
+//
+//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_AST_JSONNODEDUMPER_H
#define LLVM_CLANG_AST_JSONNODEDUMPER_H
@@ -26,6 +42,9 @@
#include "clang/AST/Type.h"
#include "llvm/Support/JSON.h"
+// SEI: added for caching addresses of certain visited nodes
+#include <unordered_set>
+
namespace clang {
class APValue;
@@ -111,8 +130,8 @@ class NodeStreamer {
// Dumps AST nodes in JSON format. There is no implied stability for the
// content or format of the dump between major releases of Clang, other than it
// being valid JSON output. Further, there is no requirement that the
-// information dumped is a complete representation of the AST, only that the
-// information presented is correct.
+// information dumped be a complete representation of the AST, only that the
+// information presented be correct.
class JSONNodeDumper
: public ConstAttrVisitor<JSONNodeDumper>,
public comments::ConstCommentVisitor<JSONNodeDumper, void,
@@ -132,6 +151,9 @@ class JSONNodeDumper
StringRef LastLocFilename, LastLocPresumedFilename;
unsigned LastLocLine, LastLocPresumedLine;
+ // SEI: caches addresses for QualType nodes that are duplicates
+ std::unordered_set<void *> AddressCache;
+
using InnerAttrVisitor = ConstAttrVisitor<JSONNodeDumper>;
using InnerCommentVisitor =
comments::ConstCommentVisitor<JSONNodeDumper, void,
@@ -184,6 +206,18 @@ class JSONNodeDumper
StringRef getCommentCommandName(unsigned CommandID) const;
+ /// SEI: simple cacher for addresses of nodes to reduce
+ /// bloat caused by SEI changes
+ /// Return True if it's already cached, otherwise false
+ bool cacheAddress(void *p) {
+ if (AddressCache.find(p) == AddressCache.end()) {
+ AddressCache.insert(p);
+ return false;
+ }
+
+ return true;
+ }
+
public:
JSONNodeDumper(raw_ostream &OS, const SourceManager &SrcMgr, ASTContext &Ctx,
const PrintingPolicy &PrintPolicy,
@@ -196,6 +230,13 @@ class JSONNodeDumper
void Visit(const Stmt *Node);
void Visit(const Type *T);
void Visit(QualType T);
+
+ // SEI: get specific details from the qual type
+ void VisitQualTypeDetails(QualType T);
+
+ // SEI: traverse ReturnType information
+ void VisitReturnType(QualType T);
+
void Visit(const Decl *D);
void Visit(TypeLoc TL);
diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h
index 9c320c8ae3e54c..89f0fb110b31a2 100644
--- a/clang/include/clang/AST/TextNodeDumper.h
+++ b/clang/include/clang/AST/TextNodeDumper.h
@@ -9,6 +9,22 @@
// This file implements AST dumping of components of individual AST nodes.
//
//===----------------------------------------------------------------------===//
+//
+// Modifications to this file by SEI staff are copyright Carnegie Mellon
+// University and contributed under the Apache License v2.0 with LLVM
+// Exceptions.
+//
+// SEI Contributions are made with funding sand support from the Department of
+// Defense under Contract No. FA8702-15-D-0002 with Carnegie Mellon University
+// for the operation of the Software Engineering Institute, a federally funded
+// research and development center.
+//
+// The view, opinions, and/or findings contained in this material are those of
+// the author(s) and should not be construed as an official Government position,
+// policy, or decision, unless designated by other documentation.
+// DM24-0194
+//
+//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_AST_TEXTNODEDUMPER_H
#define LLVM_CLANG_AST_TEXTNODEDUMPER_H
@@ -183,6 +199,12 @@ class TextNodeDumper
void Visit(TypeLoc);
+ // SEI: added support for getting ReturnType information
+ void VisitReturnType(QualType T);
+
+ // SEI: added support for more QT details. it's a passthrough for this class
+ void VisitQualTypeDetails(QualType T) {}
+
void Visit(const Decl *D);
void Visit(const CXXCtorInitializer *Init);
diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp
index ddbe2136a671f3..72b2c2b9b8aa80 100644
--- a/clang/lib/AST/JSONNodeDumper.cpp
+++ b/clang/lib/AST/JSONNodeDumper.cpp
@@ -71,6 +71,19 @@ void JSONNodeDumper::Visit(const Stmt *S) {
}
void JSONNodeDumper::Visit(const Type *T) {
+ // SEI: ensure FPTs are debloated. this can be expanded to ALL types, if
+ // desired
+ if (T && cacheAddress((void *)T)) {
+ // add this as a child to know that it's a Type
+ AddChild("typeDetails",
+ [=] { JOS.attribute("refId", createPointerRepresentation(T)); });
+
+ InnerTypeVisitor::Visit(T);
+ // SEI
+ VisitQualTypeDetails(T->getCanonicalTypeInternal());
+ return;
+ }
+
JOS.attribute("id", createPointerRepresentation(T));
if (!T)
@@ -87,13 +100,44 @@ void JSONNodeDumper::Visit(const Type *T) {
T->containsUnexpandedParameterPack());
attributeOnlyIfTrue("isImported", T->isFromAST());
InnerTypeVisitor::Visit(T);
+ // SEI
+ VisitQualTypeDetails(T->getCanonicalTypeInternal());
}
void JSONNodeDumper::Visit(QualType T) {
- JOS.attribute("id", createPointerRepresentation(T.getAsOpaquePtr()));
- JOS.attribute("kind", "QualType");
- JOS.attribute("type", createQualType(T));
- JOS.attribute("qualifiers", T.split().Quals.getAsString());
+
+ // SEI: used AddChild to prevent qualType from being part added to a list
+ // JOS.attributeArray("qualTypes", [=] {
+
+ // SEI: force qualType into its own block, otherwise multiple Visits
+ // create a bunch of siblings, which is invalid JSON
+ JOS.attributeBegin("qualType");
+ JOS.objectBegin();
+
+ // SEI: cache visited addresses and add only its refId
+ // instead of the kind, type, quals, but leave the qual type details
+ // because those can differ among IDs
+ if (cacheAddress(T.getAsOpaquePtr())) {
+ JOS.attribute("refId", createPointerRepresentation(T.getAsOpaquePtr()));
+ } else {
+ JOS.attribute("id", createPointerRepresentation(T.getAsOpaquePtr()));
+ JOS.attribute("kind", "QualType");
+ JOS.attribute("type", createQualType(T));
+ JOS.attribute("qualifiers", T.split().Quals.getAsString());
+ }
+
+ // SEI: get add'l info required for redemption analysis
+ // the qual type details differ even among cached references
+ VisitQualTypeDetails(T);
+
+ // SEI: if this is a pointer type, then recursively call ourselves
+ // until it's not
+ if (T->isPointerType())
+ Visit(T->getPointeeType());
+
+ JOS.objectEnd();
+ JOS.attributeEnd();
+ //} );
}
void JSONNodeDumper::Visit(TypeLoc TL) {
@@ -111,6 +155,64 @@ void JSONNodeDumper::Visit(TypeLoc TL) {
[TL, this] { writeSourceRange(TL.getSourceRange()); });
}
+void JSONNodeDumper::VisitQualTypeDetails(QualType T) {
+ // SEI: get more detailed info on type. this info is not transferrable
+ // with the refId, so this must be called on every type even if that type
+ // has been cached
+ JOS.attributeBegin("qualDetails");
+ JOS.arrayBegin();
+
+ auto CT = T->getCanonicalTypeInternal();
+
+ if (CT->isStructureType())
+ JOS.value("struct");
+
+ if (CT->isNullPtrType())
+ JOS.value("null");
+ if (CT->isUndeducedType())
+ JOS.value("undeduced");
+
+ if (CT->isPointerType())
+ JOS.value("ptr");
+ if (CT->isVoidType())
+ JOS.value("void");
+
+ if (CT->isSignedIntegerType())
+ JOS.value("signed");
+ if (CT->isUnsignedIntegerType())
+ JOS.value("unsigned");
+ if (CT->isIntegerType())
+ JOS.value("integer");
+ if (CT->isFloatingType())
+ JOS.value("fpp");
+ if (CT->isEnumeralType())
+ JOS.value("enum");
+ if (CT->isUnionType())
+ JOS.value("union");
+ if (CT->isFunctionPointerType())
+ JOS.value("func_ptr");
+ if (CT->isTypedefNameType())
+ JOS.value("type_def");
+ if (CT->isArrayType())
+ JOS.value("array");
+
+ JOS.arrayEnd();
+ JOS.attributeEnd();
+}
+
+// SEI: capture the return info in a nested JSON block
+void JSONNodeDumper::VisitReturnType(QualType T) {
+ // using this function allows us to easily wrap just the returnType
+ // section into its own JSON block. if we do this in ASTNodeTraverser,
+ // then the TextNodeDumper works as expected but the JSONNodeDumper
+ // rolls all siblings into the returnType node with those siblings as child
+ // nodes
+
+ JOS.attributeObject("returnTypeDetail", [=] { Visit(T); });
+
+ // Visit(T);
+}
+
void JSONNodeDumper::Visit(const Decl *D) {
JOS.attribute("id", createPointerRepresentation(D));
@@ -175,6 +277,14 @@ void JSONNodeDumper::Visit(const TemplateArgument &TA, SourceRange R,
void JSONNodeDumper::Visit(const CXXCtorInitializer *Init) {
JOS.attribute("kind", "CXXCtorInitializer");
+
+ // SEI: added id for
+ JOS.attribute("id", createPointerRepresentation(Init));
+
+ // SEI: added range for CXXCtorInitializers
+ JOS.attributeObject(
+ "range", [Init, this] { writeSourceRange(Init->getSourceRange()); });
+
if (Init->isAnyMemberInitializer())
JOS.attribute("anyInit", createBareDeclRef(Init->getAnyMember()));
else if (Init->isBaseInitializer())
@@ -958,6 +1068,10 @@ void JSONNodeDumper::VisitFieldDecl(const FieldDecl *FD) {
attributeOnlyIfTrue("modulePrivate", FD->isModulePrivate());
attributeOnlyIfTrue("isBitfield", FD->isBitField());
attributeOnlyIfTrue("hasInClassInitializer", FD->hasInClassInitializer());
+
+ // SEI: had to add this in b/c FieldDecls do not seem to call
+ // Visit(QualType)
+ Visit(FD->getType());
}
void JSONNodeDumper::VisitFunctionDecl(const FunctionDecl *FD) {
@@ -1346,6 +1460,8 @@ void JSONNodeDumper::VisitDeclRefExpr(const DeclRefExpr *DRE) {
case NOUR_Discarded: JOS.attribute("nonOdrUseReason", "discarded"); break;
}
attributeOnlyIfTrue("isImmediateEscalating", DRE->isImmediateEscalating());
+ // SEI: this doesn't call VisitNamedDecl, so we force it
+ Visit(DRE->getType());
}
void JSONNodeDumper::VisitSYCLUniqueStableNameExpr(
diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp
index 15b23d60c3ffab..7d87c4d1b5eeb8 100644
--- a/clang/lib/AST/TextNodeDumper.cpp
+++ b/clang/lib/AST/TextNodeDumper.cpp
@@ -241,6 +241,14 @@ void TextNodeDumper::Visit(QualType T) {
OS << " " << T.split().Quals.getAsString();
}
+void TextNodeDumper::VisitReturnType(QualType T) {
+ OS << "ReturnType";
+ dumpPointer(T.getAsOpaquePtr());
+ OS << " ";
+ dumpBareType(T, false);
+ OS << " " << T.split().Quals.getAsString();
+}
+
void TextNodeDumper::Visit(TypeLoc TL) {
if (!TL) {
ColorScope Color(OS, ShowColors, NullColor);
diff --git a/clang/unittests/AST/ASTTraverserTest.cpp b/clang/unittests/AST/ASTTraverserTest.cpp
index 8b6e3e90c0ea67..5f10b69862171a 100644
--- a/clang/unittests/AST/ASTTraverserTest.cpp
+++ b/clang/unittests/AST/ASTTraverserTest.cpp
@@ -89,6 +89,10 @@ class NodeTreePrinter : public TextTreeStructure {
}
}
+ // SEI: added for class completeness
+ void VisitFunctionType(const FunctionType *T) {}
+ void VisitReturnType(QualType T) {}
+
template <typename... T> void Visit(T...) {}
};
``````````
</details>
https://github.com/llvm/llvm-project/pull/111705
More information about the cfe-commits
mailing list