[clang] modified AST for SEI redemption project (PR #111705)

via cfe-commits cfe-commits at lists.llvm.org
Wed Oct 9 08:55:58 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Nicholas Reimer (sei-nreimer)

<details>
<summary>Changes</summary>

AST output modifications primarily focused on JSON enhancements for the SEI Redemption project. Some of the key changes are:

1. Recursive Pointer Resolution
- Modified the AST to accurately track multiple levels of pointer indirection during analysis

2. Function Pointer Identification
- Improved function pointer type identification handling, enabling correct resolution and analysis of function pointer assignments and calls within the AST

3. QualType Enhancements
- Updated QualType to better represent types with qualifiers such as `const`, `volatile`, and others

4. QualDetails Addition
- Introduced a new structure, QualDetails, to encapsulate additional metadata about type qualifiers, e.g., `ptr`, `signed`, `float`, `struct`, `union`, `array`, `promotable`, `integer`, `func_ptr`

5. Return Type Information
- Updated the AST to expose detailed return type information, similar to VarDecl

6. JSON Debloating:
- Reduced the size of JSON output by caching IDs and then using the `refID` key for referring back to the original ID

7. Added Missing Range and ID for CXXCtorInitializer

### Examples

```c
// recursive pointer resolution
int ** c;
```
![image](https://github.com/user-attachments/assets/e753bacb-67ec-47ae-b7a3-a24096cf6f31)

```c
// function pointer information
long (*foo)(int (*)(short));
```
![image](https://github.com/user-attachments/assets/e97abf8e-4acc-4716-8f34-2e0a80d0d94c)

```c
// return type information
int runFunctionTestA( char a );
```
![image](https://github.com/user-attachments/assets/bc348133-a778-4048-8eea-f81991c3e585)

```c
// refID usage example. 
int a; // (not in image) first time encountering an int
int *b; // (not in image) first time encountering int *, but second time encountering int
int **c;  // first time encountering int **, but second time encountering int * and third time int
```
![image](https://github.com/user-attachments/assets/77f7f6d0-be66-4618-9155-0bc7c8f3ff34)



---
Full diff: https://github.com/llvm/llvm-project/pull/111705.diff


6 Files Affected:

- (modified) clang/include/clang/AST/ASTNodeTraverser.h (+55-4) 
- (modified) clang/include/clang/AST/JSONNodeDumper.h (+43-2) 
- (modified) clang/include/clang/AST/TextNodeDumper.h (+22) 
- (modified) clang/lib/AST/JSONNodeDumper.cpp (+120-4) 
- (modified) clang/lib/AST/TextNodeDumper.cpp (+8) 
- (modified) clang/unittests/AST/ASTTraverserTest.cpp (+4) 


``````````diff
diff --git a/clang/include/clang/AST/ASTNodeTraverser.h b/clang/include/clang/AST/ASTNodeTraverser.h
index a443a88bab1f2d..3299011771f332 100644
--- a/clang/include/clang/AST/ASTNodeTraverser.h
+++ b/clang/include/clang/AST/ASTNodeTraverser.h
@@ -11,6 +11,22 @@
 // similar to RecursiveASTVisitor.
 //
 //===----------------------------------------------------------------------===//
+//
+// Modifications to this file by SEI staff are copyright Carnegie Mellon
+// University and contributed under the Apache License v2.0 with LLVM
+// Exceptions.
+//
+// SEI Contributions are made with funding sand support from the Department of
+// Defense under Contract No. FA8702-15-D-0002 with Carnegie Mellon University
+// for the operation of the Software Engineering Institute, a federally funded
+// research and development center.
+//
+// The view, opinions, and/or findings contained in this material are those of
+// the author(s) and should not be construed as an official Government position,
+// policy, or decision, unless designated by other documentation.
+// DM24-0194
+//
+//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_AST_ASTNODETRAVERSER_H
 #define LLVM_CLANG_AST_ASTNODETRAVERSER_H
@@ -177,14 +193,34 @@ class ASTNodeTraverser
     if (!SQT.Quals.hasQualifiers())
       return Visit(SQT.Ty);
 
-    getNodeDelegate().AddChild([=] {
+    // SEI: changed from default label to "qualTypeDetail"
+    getNodeDelegate().AddChild("qualTypeDetail", [this, T] {
       getNodeDelegate().Visit(T);
       Visit(T.split().Ty);
     });
+
+    // SEI function pointer support. this gets called whenever the three
+    // conditions are met:
+    // 1. the function pointer is not typedef'd
+    // 2. after Visit(VarDecl *) gets called
+    // 3. if VarDecl determines this is a function pointer
+    if (T->isFunctionPointerType()) {
+      // create as a child node to this type
+      getNodeDelegate().AddChild(
+          [=] { getNodeDelegate().Visit(T->getPointeeType()); });
+    }
+
+    // SEI: traverse PointerType information
+    if (T->isPointerType())
+      Visit(T->getPointeeType());
   }
 
+  // SEI: traverse ReturnType information
+  void VisitReturnType(QualType T) { getNodeDelegate().VisitReturnType(T); }
+
   void Visit(const Type *T) {
-    getNodeDelegate().AddChild([=] {
+    // SEI: renamed this from default label
+    getNodeDelegate().AddChild("typeDetails", [this, T] {
       getNodeDelegate().Visit(T);
       if (!T)
         return;
@@ -209,7 +245,8 @@ class ASTNodeTraverser
   }
 
   void Visit(const Attr *A) {
-    getNodeDelegate().AddChild([=] {
+    // SEI: renamed from default label
+    getNodeDelegate().AddChild("attrDetails", [this, A] {
       getNodeDelegate().Visit(A);
       ConstAttrVisitor<Derived>::Visit(A);
     });
@@ -410,8 +447,17 @@ class ASTNodeTraverser
     Visit(T->getSizeExpr());
   }
   void VisitVectorType(const VectorType *T) { Visit(T->getElementType()); }
-  void VisitFunctionType(const FunctionType *T) { Visit(T->getReturnType()); }
+  void VisitFunctionType(const FunctionType *T) {
+    // SEI: add functionDetails, incl. return type
+    getNodeDelegate().AddChild("functionDetails", [this, T] {
+      getNodeDelegate().VisitFunctionType(T);
+      getNodeDelegate().VisitReturnType(T->getReturnType());
+    });
+  }
+
   void VisitFunctionProtoType(const FunctionProtoType *T) {
+
+    // SEI: visit the function type. this will force the return type info too.
     VisitFunctionType(T);
     for (const QualType &PT : T->getParamTypes())
       Visit(PT);
@@ -560,6 +606,11 @@ class ASTNodeTraverser
       Visit(TSI->getTypeLoc());
     if (D->hasInit())
       Visit(D->getInit());
+
+    // SEI: if this is a function pointer, then we need to get the
+    // FunctionProtoType and then make add'l visits. if the FP is typedef'd,
+    // then this behavior occurs for us outside of Visit(VarDecl *)
+    getNodeDelegate().Visit(D->getType());
   }
 
   void VisitDecompositionDecl(const DecompositionDecl *D) {
diff --git a/clang/include/clang/AST/JSONNodeDumper.h b/clang/include/clang/AST/JSONNodeDumper.h
index 9422c8fceccfbd..25cec3abcbae82 100644
--- a/clang/include/clang/AST/JSONNodeDumper.h
+++ b/clang/include/clang/AST/JSONNodeDumper.h
@@ -10,6 +10,22 @@
 // a JSON.
 //
 //===----------------------------------------------------------------------===//
+//
+// Modifications to this file by SEI staff are copyright Carnegie Mellon
+// University and contributed under the Apache License v2.0 with LLVM
+// Exceptions.
+//
+// SEI Contributions are made with funding sand support from the Department of
+// Defense under Contract No. FA8702-15-D-0002 with Carnegie Mellon University
+// for the operation of the Software Engineering Institute, a federally funded
+// research and development center.
+//
+// The view, opinions, and/or findings contained in this material are those of
+// the author(s) and should not be construed as an official Government position,
+// policy, or decision, unless designated by other documentation.
+// DM24-0194
+//
+//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_AST_JSONNODEDUMPER_H
 #define LLVM_CLANG_AST_JSONNODEDUMPER_H
@@ -26,6 +42,9 @@
 #include "clang/AST/Type.h"
 #include "llvm/Support/JSON.h"
 
+// SEI: added for caching addresses of certain visited nodes
+#include <unordered_set>
+
 namespace clang {
 
 class APValue;
@@ -111,8 +130,8 @@ class NodeStreamer {
 // Dumps AST nodes in JSON format. There is no implied stability for the
 // content or format of the dump between major releases of Clang, other than it
 // being valid JSON output. Further, there is no requirement that the
-// information dumped is a complete representation of the AST, only that the
-// information presented is correct.
+// information dumped be a complete representation of the AST, only that the
+// information presented be correct.
 class JSONNodeDumper
     : public ConstAttrVisitor<JSONNodeDumper>,
       public comments::ConstCommentVisitor<JSONNodeDumper, void,
@@ -132,6 +151,9 @@ class JSONNodeDumper
   StringRef LastLocFilename, LastLocPresumedFilename;
   unsigned LastLocLine, LastLocPresumedLine;
 
+  // SEI: caches addresses for QualType nodes that are duplicates
+  std::unordered_set<void *> AddressCache;
+
   using InnerAttrVisitor = ConstAttrVisitor<JSONNodeDumper>;
   using InnerCommentVisitor =
       comments::ConstCommentVisitor<JSONNodeDumper, void,
@@ -184,6 +206,18 @@ class JSONNodeDumper
 
   StringRef getCommentCommandName(unsigned CommandID) const;
 
+  /// SEI: simple cacher for addresses of nodes to reduce
+  /// bloat caused by SEI changes
+  /// Return True if it's already cached, otherwise false
+  bool cacheAddress(void *p) {
+    if (AddressCache.find(p) == AddressCache.end()) {
+      AddressCache.insert(p);
+      return false;
+    }
+
+    return true;
+  }
+
 public:
   JSONNodeDumper(raw_ostream &OS, const SourceManager &SrcMgr, ASTContext &Ctx,
                  const PrintingPolicy &PrintPolicy,
@@ -196,6 +230,13 @@ class JSONNodeDumper
   void Visit(const Stmt *Node);
   void Visit(const Type *T);
   void Visit(QualType T);
+
+  // SEI: get specific details from the qual type
+  void VisitQualTypeDetails(QualType T);
+
+  // SEI: traverse ReturnType information
+  void VisitReturnType(QualType T);
+
   void Visit(const Decl *D);
   void Visit(TypeLoc TL);
 
diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h
index 9c320c8ae3e54c..89f0fb110b31a2 100644
--- a/clang/include/clang/AST/TextNodeDumper.h
+++ b/clang/include/clang/AST/TextNodeDumper.h
@@ -9,6 +9,22 @@
 // This file implements AST dumping of components of individual AST nodes.
 //
 //===----------------------------------------------------------------------===//
+//
+// Modifications to this file by SEI staff are copyright Carnegie Mellon
+// University and contributed under the Apache License v2.0 with LLVM
+// Exceptions.
+//
+// SEI Contributions are made with funding sand support from the Department of
+// Defense under Contract No. FA8702-15-D-0002 with Carnegie Mellon University
+// for the operation of the Software Engineering Institute, a federally funded
+// research and development center.
+//
+// The view, opinions, and/or findings contained in this material are those of
+// the author(s) and should not be construed as an official Government position,
+// policy, or decision, unless designated by other documentation.
+// DM24-0194
+//
+//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_AST_TEXTNODEDUMPER_H
 #define LLVM_CLANG_AST_TEXTNODEDUMPER_H
@@ -183,6 +199,12 @@ class TextNodeDumper
 
   void Visit(TypeLoc);
 
+  // SEI: added support for getting ReturnType information
+  void VisitReturnType(QualType T);
+
+  // SEI: added support for more QT details. it's a passthrough for this class
+  void VisitQualTypeDetails(QualType T) {}
+
   void Visit(const Decl *D);
 
   void Visit(const CXXCtorInitializer *Init);
diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp
index ddbe2136a671f3..72b2c2b9b8aa80 100644
--- a/clang/lib/AST/JSONNodeDumper.cpp
+++ b/clang/lib/AST/JSONNodeDumper.cpp
@@ -71,6 +71,19 @@ void JSONNodeDumper::Visit(const Stmt *S) {
 }
 
 void JSONNodeDumper::Visit(const Type *T) {
+  // SEI: ensure FPTs are debloated. this can be expanded to ALL types, if
+  // desired
+  if (T && cacheAddress((void *)T)) {
+    // add this as a child to know that it's a Type
+    AddChild("typeDetails",
+             [=] { JOS.attribute("refId", createPointerRepresentation(T)); });
+
+    InnerTypeVisitor::Visit(T);
+    // SEI
+    VisitQualTypeDetails(T->getCanonicalTypeInternal());
+    return;
+  }
+
   JOS.attribute("id", createPointerRepresentation(T));
 
   if (!T)
@@ -87,13 +100,44 @@ void JSONNodeDumper::Visit(const Type *T) {
                       T->containsUnexpandedParameterPack());
   attributeOnlyIfTrue("isImported", T->isFromAST());
   InnerTypeVisitor::Visit(T);
+  // SEI
+  VisitQualTypeDetails(T->getCanonicalTypeInternal());
 }
 
 void JSONNodeDumper::Visit(QualType T) {
-  JOS.attribute("id", createPointerRepresentation(T.getAsOpaquePtr()));
-  JOS.attribute("kind", "QualType");
-  JOS.attribute("type", createQualType(T));
-  JOS.attribute("qualifiers", T.split().Quals.getAsString());
+
+  // SEI: used AddChild to prevent qualType from being part added to a list
+  // JOS.attributeArray("qualTypes", [=] {
+
+  // SEI: force qualType into its own block, otherwise multiple Visits
+  // create a bunch of siblings, which is invalid JSON
+  JOS.attributeBegin("qualType");
+  JOS.objectBegin();
+
+  // SEI: cache visited addresses and add only its refId
+  // instead of the kind, type, quals, but leave the qual type details
+  // because those can differ among IDs
+  if (cacheAddress(T.getAsOpaquePtr())) {
+    JOS.attribute("refId", createPointerRepresentation(T.getAsOpaquePtr()));
+  } else {
+    JOS.attribute("id", createPointerRepresentation(T.getAsOpaquePtr()));
+    JOS.attribute("kind", "QualType");
+    JOS.attribute("type", createQualType(T));
+    JOS.attribute("qualifiers", T.split().Quals.getAsString());
+  }
+
+  // SEI: get add'l info required for redemption analysis
+  // the qual type details differ even among cached references
+  VisitQualTypeDetails(T);
+
+  // SEI: if this is a pointer type, then recursively call ourselves
+  // until it's not
+  if (T->isPointerType())
+    Visit(T->getPointeeType());
+
+  JOS.objectEnd();
+  JOS.attributeEnd();
+  //} );
 }
 
 void JSONNodeDumper::Visit(TypeLoc TL) {
@@ -111,6 +155,64 @@ void JSONNodeDumper::Visit(TypeLoc TL) {
                       [TL, this] { writeSourceRange(TL.getSourceRange()); });
 }
 
+void JSONNodeDumper::VisitQualTypeDetails(QualType T) {
+  // SEI: get more detailed info on type. this info is not transferrable
+  // with the refId, so this must be called on every type even if that type
+  // has been cached
+  JOS.attributeBegin("qualDetails");
+  JOS.arrayBegin();
+
+  auto CT = T->getCanonicalTypeInternal();
+
+  if (CT->isStructureType())
+    JOS.value("struct");
+
+  if (CT->isNullPtrType())
+    JOS.value("null");
+  if (CT->isUndeducedType())
+    JOS.value("undeduced");
+
+  if (CT->isPointerType())
+    JOS.value("ptr");
+  if (CT->isVoidType())
+    JOS.value("void");
+
+  if (CT->isSignedIntegerType())
+    JOS.value("signed");
+  if (CT->isUnsignedIntegerType())
+    JOS.value("unsigned");
+  if (CT->isIntegerType())
+    JOS.value("integer");
+  if (CT->isFloatingType())
+    JOS.value("fpp");
+  if (CT->isEnumeralType())
+    JOS.value("enum");
+  if (CT->isUnionType())
+    JOS.value("union");
+  if (CT->isFunctionPointerType())
+    JOS.value("func_ptr");
+  if (CT->isTypedefNameType())
+    JOS.value("type_def");
+  if (CT->isArrayType())
+    JOS.value("array");
+
+  JOS.arrayEnd();
+  JOS.attributeEnd();
+}
+
+// SEI: capture the return info in a nested JSON block
+void JSONNodeDumper::VisitReturnType(QualType T) {
+  // using this function allows us to easily wrap just the returnType
+  // section into its own JSON block. if we do this in ASTNodeTraverser,
+  // then the TextNodeDumper works as expected but the JSONNodeDumper
+  // rolls all siblings into the returnType node with those siblings as child
+  // nodes
+
+  JOS.attributeObject("returnTypeDetail", [=] { Visit(T); });
+
+  // Visit(T);
+}
+
 void JSONNodeDumper::Visit(const Decl *D) {
   JOS.attribute("id", createPointerRepresentation(D));
 
@@ -175,6 +277,14 @@ void JSONNodeDumper::Visit(const TemplateArgument &TA, SourceRange R,
 
 void JSONNodeDumper::Visit(const CXXCtorInitializer *Init) {
   JOS.attribute("kind", "CXXCtorInitializer");
+
+  // SEI: added id for
+  JOS.attribute("id", createPointerRepresentation(Init));
+
+  // SEI: added range for CXXCtorInitializers
+  JOS.attributeObject(
+      "range", [Init, this] { writeSourceRange(Init->getSourceRange()); });
+
   if (Init->isAnyMemberInitializer())
     JOS.attribute("anyInit", createBareDeclRef(Init->getAnyMember()));
   else if (Init->isBaseInitializer())
@@ -958,6 +1068,10 @@ void JSONNodeDumper::VisitFieldDecl(const FieldDecl *FD) {
   attributeOnlyIfTrue("modulePrivate", FD->isModulePrivate());
   attributeOnlyIfTrue("isBitfield", FD->isBitField());
   attributeOnlyIfTrue("hasInClassInitializer", FD->hasInClassInitializer());
+
+  // SEI: had to add this in b/c FieldDecls do not seem to call
+  // Visit(QualType)
+  Visit(FD->getType());
 }
 
 void JSONNodeDumper::VisitFunctionDecl(const FunctionDecl *FD) {
@@ -1346,6 +1460,8 @@ void JSONNodeDumper::VisitDeclRefExpr(const DeclRefExpr *DRE) {
   case NOUR_Discarded: JOS.attribute("nonOdrUseReason", "discarded"); break;
   }
   attributeOnlyIfTrue("isImmediateEscalating", DRE->isImmediateEscalating());
+  // SEI: this doesn't call VisitNamedDecl, so we force it
+  Visit(DRE->getType());
 }
 
 void JSONNodeDumper::VisitSYCLUniqueStableNameExpr(
diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp
index 15b23d60c3ffab..7d87c4d1b5eeb8 100644
--- a/clang/lib/AST/TextNodeDumper.cpp
+++ b/clang/lib/AST/TextNodeDumper.cpp
@@ -241,6 +241,14 @@ void TextNodeDumper::Visit(QualType T) {
   OS << " " << T.split().Quals.getAsString();
 }
 
+void TextNodeDumper::VisitReturnType(QualType T) {
+  OS << "ReturnType";
+  dumpPointer(T.getAsOpaquePtr());
+  OS << " ";
+  dumpBareType(T, false);
+  OS << " " << T.split().Quals.getAsString();
+}
+
 void TextNodeDumper::Visit(TypeLoc TL) {
   if (!TL) {
     ColorScope Color(OS, ShowColors, NullColor);
diff --git a/clang/unittests/AST/ASTTraverserTest.cpp b/clang/unittests/AST/ASTTraverserTest.cpp
index 8b6e3e90c0ea67..5f10b69862171a 100644
--- a/clang/unittests/AST/ASTTraverserTest.cpp
+++ b/clang/unittests/AST/ASTTraverserTest.cpp
@@ -89,6 +89,10 @@ class NodeTreePrinter : public TextTreeStructure {
     }
   }
 
+  // SEI: added for class completeness
+  void VisitFunctionType(const FunctionType *T) {}
+  void VisitReturnType(QualType T) {}
+
   template <typename... T> void Visit(T...) {}
 };
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/111705


More information about the cfe-commits mailing list