[clang] 7b6fe71 - Refactor StaticAnalyzer to use `clang::SarifDocumentWriter`

Aaron Ballman via cfe-commits cfe-commits at lists.llvm.org
Thu Nov 17 11:48:44 PST 2022


Author: Vaibhav Yenamandra
Date: 2022-11-17T14:47:02-05:00
New Revision: 7b6fe711b210a90cbb8facfe5343a0f999de5a0c

URL: https://github.com/llvm/llvm-project/commit/7b6fe711b210a90cbb8facfe5343a0f999de5a0c
DIFF: https://github.com/llvm/llvm-project/commit/7b6fe711b210a90cbb8facfe5343a0f999de5a0c.diff

LOG: Refactor StaticAnalyzer to use `clang::SarifDocumentWriter`

Refactor StaticAnalyzer to use clang::SarifDocumentWriter for
serializing sarif diagnostics.

Uses clang::SarifDocumentWriter to generate SARIF output in the
StaticAnalyzer.

Various bugfixes are also made to clang::SarifDocumentWriter.

Summary of changes:

clang/lib/Basic/Sarif.cpp:
  * Fix bug in adjustColumnPos introduced from prev move, it now uses
    FullSourceLoc::getDecomposedExpansionLoc which provides the correct
    location (in the presence of macros) instead of
    FullSourceLoc::getDecomposedLoc.
  * Fix createTextRegion so that it handles caret ranges correctly,
    this should bring it to parity with the previous implementation.

clang/test/Analysis/diagnostics/Inputs/expected-sarif:
  * Update the schema URL to the offical website
  * Add the emitted defaultConfiguration sections to all rules
  * Annotate results with the "level" property

clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp:
  * Update SarifDiagnostics class to hold a clang::SarifDocumentWriter
    that it uses to convert diagnostics to SARIF.

Added: 
    

Modified: 
    clang/include/clang/Basic/SourceLocation.h
    clang/lib/Basic/Sarif.cpp
    clang/lib/Basic/SourceLocation.cpp
    clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp
    clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif
    clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif
    clang/test/Frontend/sarif-diagnostics.cpp
    clang/unittests/Basic/SarifTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/SourceLocation.h b/clang/include/clang/Basic/SourceLocation.h
index 4451523bc9450..c709c1102834b 100644
--- a/clang/include/clang/Basic/SourceLocation.h
+++ b/clang/include/clang/Basic/SourceLocation.h
@@ -398,6 +398,12 @@ class FullSourceLoc : public SourceLocation {
   unsigned getExpansionLineNumber(bool *Invalid = nullptr) const;
   unsigned getExpansionColumnNumber(bool *Invalid = nullptr) const;
 
+  /// Decompose the underlying \c SourceLocation into a raw (FileID + Offset)
+  /// pair, after walking through all expansion records.
+  ///
+  /// \see SourceManager::getDecomposedExpansionLoc
+  std::pair<FileID, unsigned> getDecomposedExpansionLoc() const;
+
   unsigned getSpellingLineNumber(bool *Invalid = nullptr) const;
   unsigned getSpellingColumnNumber(bool *Invalid = nullptr) const;
 

diff  --git a/clang/lib/Basic/Sarif.cpp b/clang/lib/Basic/Sarif.cpp
index 73be86b57df77..b3bee674225d2 100644
--- a/clang/lib/Basic/Sarif.cpp
+++ b/clang/lib/Basic/Sarif.cpp
@@ -118,7 +118,7 @@ static unsigned int adjustColumnPos(FullSourceLoc Loc,
                                     unsigned int TokenLen = 0) {
   assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");
 
-  std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedLoc();
+  std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedExpansionLoc();
   Optional<MemoryBufferRef> Buf =
       Loc.getManager().getBufferOrNone(LocInfo.first);
   assert(Buf && "got an invalid buffer for the location's file");
@@ -149,13 +149,16 @@ json::Object createMessage(StringRef Text) {
 /// \pre CharSourceRange must be a token range
 static json::Object createTextRegion(const SourceManager &SM,
                                      const CharSourceRange &R) {
-  FullSourceLoc FirstTokenLoc{R.getBegin(), SM};
-  FullSourceLoc LastTokenLoc{R.getEnd(), SM};
-  json::Object Region{{"startLine", FirstTokenLoc.getExpansionLineNumber()},
-                      {"startColumn", adjustColumnPos(FirstTokenLoc)},
-                      {"endColumn", adjustColumnPos(LastTokenLoc)}};
-  if (FirstTokenLoc != LastTokenLoc) {
-    Region["endLine"] = LastTokenLoc.getExpansionLineNumber();
+  FullSourceLoc BeginCharLoc{R.getBegin(), SM};
+  FullSourceLoc EndCharLoc{R.getEnd(), SM};
+  json::Object Region{{"startLine", BeginCharLoc.getExpansionLineNumber()},
+                      {"startColumn", adjustColumnPos(BeginCharLoc)}};
+
+  if (BeginCharLoc == EndCharLoc) {
+    Region["endColumn"] = adjustColumnPos(BeginCharLoc);
+  } else {
+    Region["endLine"] = EndCharLoc.getExpansionLineNumber();
+    Region["endColumn"] = adjustColumnPos(EndCharLoc);
   }
   return Region;
 }
@@ -232,8 +235,10 @@ SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) {
   }
   assert(I != CurrentArtifacts.end() && "Failed to insert new artifact");
   const SarifArtifactLocation &Location = I->second.Location;
-  uint32_t Idx = Location.Index.value();
-  return json::Object{{{"artifactLocation", json::Object{{{"index", Idx}}}},
+  json::Object ArtifactLocationObject{{"uri", Location.URI}};
+  if (Location.Index.has_value())
+    ArtifactLocationObject["index"] = Location.Index.value();
+  return json::Object{{{"artifactLocation", std::move(ArtifactLocationObject)},
                        {"region", createTextRegion(SourceMgr, R)}}};
 }
 

diff  --git a/clang/lib/Basic/SourceLocation.cpp b/clang/lib/Basic/SourceLocation.cpp
index f9ecd52e5f27b..772f2e612f074 100644
--- a/clang/lib/Basic/SourceLocation.cpp
+++ b/clang/lib/Basic/SourceLocation.cpp
@@ -166,6 +166,10 @@ FullSourceLoc FullSourceLoc::getExpansionLoc() const {
   return FullSourceLoc(SrcMgr->getExpansionLoc(*this), *SrcMgr);
 }
 
+std::pair<FileID, unsigned> FullSourceLoc::getDecomposedExpansionLoc() const {
+  return SrcMgr->getDecomposedExpansionLoc(*this);
+}
+
 FullSourceLoc FullSourceLoc::getSpellingLoc() const {
   assert(isValid());
   return FullSourceLoc(SrcMgr->getSpellingLoc(*this), *SrcMgr);

diff  --git a/clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp
index ad31107925929..fab520098f13e 100644
--- a/clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp
+++ b/clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp
@@ -13,6 +13,8 @@
 #include "clang/Analysis/MacroExpansionContext.h"
 #include "clang/Analysis/PathDiagnostic.h"
 #include "clang/Basic/FileManager.h"
+#include "clang/Basic/Sarif.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Version.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
@@ -30,10 +32,12 @@ namespace {
 class SarifDiagnostics : public PathDiagnosticConsumer {
   std::string OutputFile;
   const LangOptions &LO;
+  SarifDocumentWriter SarifWriter;
 
 public:
-  SarifDiagnostics(const std::string &Output, const LangOptions &LO)
-      : OutputFile(Output), LO(LO) {}
+  SarifDiagnostics(const std::string &Output, const LangOptions &LO,
+                   const SourceManager &SM)
+      : OutputFile(Output), LO(LO), SarifWriter(SM) {}
   ~SarifDiagnostics() override = default;
 
   void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags,
@@ -56,250 +60,12 @@ void ento::createSarifDiagnosticConsumer(
   if (Output.empty())
     return;
 
-  C.push_back(new SarifDiagnostics(Output, PP.getLangOpts()));
+  C.push_back(
+      new SarifDiagnostics(Output, PP.getLangOpts(), PP.getSourceManager()));
   createTextMinimalPathDiagnosticConsumer(std::move(DiagOpts), C, Output, PP,
                                           CTU, MacroExpansions);
 }
 
-static StringRef getFileName(const FileEntry &FE) {
-  StringRef Filename = FE.tryGetRealPathName();
-  if (Filename.empty())
-    Filename = FE.getName();
-  return Filename;
-}
-
-static std::string percentEncodeURICharacter(char C) {
-  // RFC 3986 claims alpha, numeric, and this handful of
-  // characters are not reserved for the path component and
-  // should be written out directly. Otherwise, percent
-  // encode the character and write that out instead of the
-  // reserved character.
-  if (llvm::isAlnum(C) ||
-      StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C))
-    return std::string(&C, 1);
-  return "%" + llvm::toHex(StringRef(&C, 1));
-}
-
-static std::string fileNameToURI(StringRef Filename) {
-  llvm::SmallString<32> Ret = StringRef("file://");
-
-  // Get the root name to see if it has a URI authority.
-  StringRef Root = sys::path::root_name(Filename);
-  if (Root.startswith("//")) {
-    // There is an authority, so add it to the URI.
-    Ret += Root.drop_front(2).str();
-  } else if (!Root.empty()) {
-    // There is no authority, so end the component and add the root to the URI.
-    Ret += Twine("/" + Root).str();
-  }
-
-  auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename);
-  assert(Iter != End && "Expected there to be a non-root path component.");
-  // Add the rest of the path components, encoding any reserved characters;
-  // we skip past the first path component, as it was handled it above.
-  for (StringRef Component : llvm::make_range(++Iter, End)) {
-    // For reasons unknown to me, we may get a backslash with Windows native
-    // paths for the initial backslash following the drive component, which
-    // we need to ignore as a URI path part.
-    if (Component == "\\")
-      continue;
-
-    // Add the separator between the previous path part and the one being
-    // currently processed.
-    Ret += "/";
-
-    // URI encode the part.
-    for (char C : Component) {
-      Ret += percentEncodeURICharacter(C);
-    }
-  }
-
-  return std::string(Ret);
-}
-
-static json::Object createArtifactLocation(const FileEntry &FE) {
-  return json::Object{{"uri", fileNameToURI(getFileName(FE))}};
-}
-
-static json::Object createArtifact(const FileEntry &FE) {
-  return json::Object{{"location", createArtifactLocation(FE)},
-                      {"roles", json::Array{"resultFile"}},
-                      {"length", FE.getSize()},
-                      {"mimeType", "text/plain"}};
-}
-
-static json::Object createArtifactLocation(const FileEntry &FE,
-                                           json::Array &Artifacts) {
-  std::string FileURI = fileNameToURI(getFileName(FE));
-
-  // See if the Artifacts array contains this URI already. If it does not,
-  // create a new artifact object to add to the array.
-  auto I = llvm::find_if(Artifacts, [&](const json::Value &File) {
-    if (const json::Object *Obj = File.getAsObject()) {
-      if (const json::Object *FileLoc = Obj->getObject("location")) {
-        Optional<StringRef> URI = FileLoc->getString("uri");
-        return URI && URI->equals(FileURI);
-      }
-    }
-    return false;
-  });
-
-  // Calculate the index within the artifact array so it can be stored in
-  // the JSON object.
-  auto Index = static_cast<unsigned>(std::distance(Artifacts.begin(), I));
-  if (I == Artifacts.end())
-    Artifacts.push_back(createArtifact(FE));
-
-  return json::Object{{"uri", FileURI}, {"index", Index}};
-}
-
-static unsigned int adjustColumnPos(const SourceManager &SM, SourceLocation Loc,
-                                    unsigned int TokenLen = 0) {
-  assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");
-
-  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedExpansionLoc(Loc);
-  assert(LocInfo.second > SM.getExpansionColumnNumber(Loc) &&
-         "position in file is before column number?");
-
-  Optional<MemoryBufferRef> Buf = SM.getBufferOrNone(LocInfo.first);
-  assert(Buf && "got an invalid buffer for the location's file");
-  assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) &&
-         "token extends past end of buffer?");
-
-  // Adjust the offset to be the start of the line, since we'll be counting
-  // Unicode characters from there until our column offset.
-  unsigned int Off = LocInfo.second - (SM.getExpansionColumnNumber(Loc) - 1);
-  unsigned int Ret = 1;
-  while (Off < (LocInfo.second + TokenLen)) {
-    Off += getNumBytesForUTF8(Buf->getBuffer()[Off]);
-    Ret++;
-  }
-
-  return Ret;
-}
-
-static json::Object createTextRegion(const LangOptions &LO, SourceRange R,
-                                     const SourceManager &SM) {
-  json::Object Region{
-      {"startLine", SM.getExpansionLineNumber(R.getBegin())},
-      {"startColumn", adjustColumnPos(SM, R.getBegin())},
-  };
-  if (R.getBegin() == R.getEnd()) {
-    Region["endColumn"] = adjustColumnPos(SM, R.getBegin());
-  } else {
-    Region["endLine"] = SM.getExpansionLineNumber(R.getEnd());
-    Region["endColumn"] = adjustColumnPos(
-        SM, R.getEnd(),
-        Lexer::MeasureTokenLength(R.getEnd(), SM, LO));
-  }
-  return Region;
-}
-
-static json::Object createPhysicalLocation(const LangOptions &LO,
-                                           SourceRange R, const FileEntry &FE,
-                                           const SourceManager &SMgr,
-                                           json::Array &Artifacts) {
-  return json::Object{
-      {{"artifactLocation", createArtifactLocation(FE, Artifacts)},
-       {"region", createTextRegion(LO, R, SMgr)}}};
-}
-
-enum class Importance { Important, Essential, Unimportant };
-
-static StringRef importanceToStr(Importance I) {
-  switch (I) {
-  case Importance::Important:
-    return "important";
-  case Importance::Essential:
-    return "essential";
-  case Importance::Unimportant:
-    return "unimportant";
-  }
-  llvm_unreachable("Fully covered switch is not so fully covered");
-}
-
-static json::Object createThreadFlowLocation(json::Object &&Location,
-                                             Importance I) {
-  return json::Object{{"location", std::move(Location)},
-                      {"importance", importanceToStr(I)}};
-}
-
-static json::Object createMessage(StringRef Text) {
-  return json::Object{{"text", Text.str()}};
-}
-
-static json::Object createLocation(json::Object &&PhysicalLocation,
-                                   StringRef Message = "") {
-  json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}};
-  if (!Message.empty())
-    Ret.insert({"message", createMessage(Message)});
-  return Ret;
-}
-
-static Importance calculateImportance(const PathDiagnosticPiece &Piece) {
-  switch (Piece.getKind()) {
-  case PathDiagnosticPiece::Call:
-  case PathDiagnosticPiece::Macro:
-  case PathDiagnosticPiece::Note:
-  case PathDiagnosticPiece::PopUp:
-    // FIXME: What should be reported here?
-    break;
-  case PathDiagnosticPiece::Event:
-    return Piece.getTagStr() == "ConditionBRVisitor" ? Importance::Important
-                                                     : Importance::Essential;
-  case PathDiagnosticPiece::ControlFlow:
-    return Importance::Unimportant;
-  }
-  return Importance::Unimportant;
-}
-
-static json::Object createThreadFlow(const LangOptions &LO,
-                                     const PathPieces &Pieces,
-                                     json::Array &Artifacts) {
-  const SourceManager &SMgr = Pieces.front()->getLocation().getManager();
-  json::Array Locations;
-  for (const auto &Piece : Pieces) {
-    const PathDiagnosticLocation &P = Piece->getLocation();
-    Locations.push_back(createThreadFlowLocation(
-        createLocation(createPhysicalLocation(
-                           LO, P.asRange(),
-                           *P.asLocation().getExpansionLoc().getFileEntry(),
-                           SMgr, Artifacts),
-                       Piece->getString()),
-        calculateImportance(*Piece)));
-  }
-  return json::Object{{"locations", std::move(Locations)}};
-}
-
-static json::Object createCodeFlow(const LangOptions &LO,
-                                   const PathPieces &Pieces,
-                                   json::Array &Artifacts) {
-  return json::Object{
-      {"threadFlows", json::Array{createThreadFlow(LO, Pieces, Artifacts)}}};
-}
-
-static json::Object createResult(const LangOptions &LO,
-                                 const PathDiagnostic &Diag,
-                                 json::Array &Artifacts,
-                                 const StringMap<unsigned> &RuleMapping) {
-  const PathPieces &Path = Diag.path.flatten(false);
-  const SourceManager &SMgr = Path.front()->getLocation().getManager();
-
-  auto Iter = RuleMapping.find(Diag.getCheckerName());
-  assert(Iter != RuleMapping.end() && "Rule ID is not in the array index map?");
-
-  return json::Object{
-      {"message", createMessage(Diag.getVerboseDescription())},
-      {"codeFlows", json::Array{createCodeFlow(LO, Path, Artifacts)}},
-      {"locations",
-       json::Array{createLocation(createPhysicalLocation(
-           LO, Diag.getLocation().asRange(),
-           *Diag.getLocation().asLocation().getExpansionLoc().getFileEntry(),
-           SMgr, Artifacts))}},
-      {"ruleIndex", Iter->getValue()},
-      {"ruleId", Diag.getCheckerName()}};
-}
-
 static StringRef getRuleDescription(StringRef CheckName) {
   return llvm::StringSwitch<StringRef>(CheckName)
 #define GET_CHECKERS
@@ -322,60 +88,99 @@ static StringRef getRuleHelpURIStr(StringRef CheckName) {
       ;
 }
 
-static json::Object createRule(const PathDiagnostic &Diag) {
-  StringRef CheckName = Diag.getCheckerName();
-  json::Object Ret{
-      {"fullDescription", createMessage(getRuleDescription(CheckName))},
-      {"name", CheckName},
-      {"id", CheckName}};
-
-  std::string RuleURI = std::string(getRuleHelpURIStr(CheckName));
-  if (!RuleURI.empty())
-    Ret["helpUri"] = RuleURI;
-
-  return Ret;
+static ThreadFlowImportance
+calculateImportance(const PathDiagnosticPiece &Piece) {
+  switch (Piece.getKind()) {
+  case PathDiagnosticPiece::Call:
+  case PathDiagnosticPiece::Macro:
+  case PathDiagnosticPiece::Note:
+  case PathDiagnosticPiece::PopUp:
+    // FIXME: What should be reported here?
+    break;
+  case PathDiagnosticPiece::Event:
+    return Piece.getTagStr() == "ConditionBRVisitor"
+               ? ThreadFlowImportance::Important
+               : ThreadFlowImportance::Essential;
+  case PathDiagnosticPiece::ControlFlow:
+    return ThreadFlowImportance::Unimportant;
+  }
+  return ThreadFlowImportance::Unimportant;
+}
+
+/// Accepts a SourceRange corresponding to a pair of the first and last tokens
+/// and converts to a Character granular CharSourceRange.
+static CharSourceRange convertTokenRangeToCharRange(const SourceRange &R,
+                                                    const SourceManager &SM,
+                                                    const LangOptions &LO) {
+  // Caret diagnostics have the first and last locations pointed at the same
+  // location, return these as-is.
+  if (R.getBegin() == R.getEnd())
+    return CharSourceRange::getCharRange(R);
+
+  SourceLocation BeginCharLoc = R.getBegin();
+  // For token ranges, the raw end SLoc points at the first character of the
+  // last token in the range. This must be moved to one past the end of the
+  // last character using the lexer.
+  SourceLocation EndCharLoc =
+      Lexer::getLocForEndOfToken(R.getEnd(), /* Offset = */ 0, SM, LO);
+  return CharSourceRange::getCharRange(BeginCharLoc, EndCharLoc);
+}
+
+static SmallVector<ThreadFlow, 8> createThreadFlows(const PathDiagnostic *Diag,
+                                                    const LangOptions &LO) {
+  SmallVector<ThreadFlow, 8> Flows;
+  const PathPieces &Pieces = Diag->path.flatten(false);
+  for (const auto &Piece : Pieces) {
+    auto Range = convertTokenRangeToCharRange(
+        Piece->getLocation().asRange(), Piece->getLocation().getManager(), LO);
+    auto Flow = ThreadFlow::create()
+                    .setImportance(calculateImportance(*Piece))
+                    .setRange(Range)
+                    .setMessage(Piece->getString());
+    Flows.push_back(Flow);
+  }
+  return Flows;
 }
 
-static json::Array createRules(std::vector<const PathDiagnostic *> &Diags,
-                               StringMap<unsigned> &RuleMapping) {
-  json::Array Rules;
+static StringMap<uint32_t>
+createRuleMapping(const std::vector<const PathDiagnostic *> &Diags,
+                  SarifDocumentWriter &SarifWriter) {
+  StringMap<uint32_t> RuleMapping;
   llvm::StringSet<> Seen;
 
   for (const PathDiagnostic *D : Diags) {
-    StringRef RuleID = D->getCheckerName();
-    std::pair<llvm::StringSet<>::iterator, bool> P = Seen.insert(RuleID);
+    StringRef CheckName = D->getCheckerName();
+    std::pair<llvm::StringSet<>::iterator, bool> P = Seen.insert(CheckName);
     if (P.second) {
-      RuleMapping[RuleID] = Rules.size(); // Maps RuleID to an Array Index.
-      Rules.push_back(createRule(*D));
+      auto Rule = SarifRule::create()
+                      .setName(CheckName)
+                      .setRuleId(CheckName)
+                      .setDescription(getRuleDescription(CheckName))
+                      .setHelpURI(getRuleHelpURIStr(CheckName));
+      size_t RuleIdx = SarifWriter.createRule(Rule);
+      RuleMapping[CheckName] = RuleIdx;
     }
   }
-
-  return Rules;
+  return RuleMapping;
 }
 
-static json::Object createTool(std::vector<const PathDiagnostic *> &Diags,
-                               StringMap<unsigned> &RuleMapping) {
-  return json::Object{
-      {"driver", json::Object{{"name", "clang"},
-                              {"fullName", "clang static analyzer"},
-                              {"language", "en-US"},
-                              {"version", getClangFullVersion()},
-                              {"rules", createRules(Diags, RuleMapping)}}}};
-}
-
-static json::Object createRun(const LangOptions &LO,
-                              std::vector<const PathDiagnostic *> &Diags) {
-  json::Array Results, Artifacts;
-  StringMap<unsigned> RuleMapping;
-  json::Object Tool = createTool(Diags, RuleMapping);
+static SarifResult createResult(const PathDiagnostic *Diag,
+                                const StringMap<uint32_t> &RuleMapping,
+                                const LangOptions &LO) {
 
-  for (const PathDiagnostic *D : Diags)
-    Results.push_back(createResult(LO, *D, Artifacts, RuleMapping));
+  StringRef CheckName = Diag->getCheckerName();
+  uint32_t RuleIdx = RuleMapping.lookup(CheckName);
+  auto Range = convertTokenRangeToCharRange(
+      Diag->getLocation().asRange(), Diag->getLocation().getManager(), LO);
 
-  return json::Object{{"tool", std::move(Tool)},
-                      {"results", std::move(Results)},
-                      {"artifacts", std::move(Artifacts)},
-                      {"columnKind", "unicodeCodePoints"}};
+  SmallVector<ThreadFlow, 8> Flows = createThreadFlows(Diag, LO);
+  auto Result = SarifResult::create(RuleIdx)
+                    .setRuleId(CheckName)
+                    .setDiagnosticMessage(Diag->getVerboseDescription())
+                    .setDiagnosticLevel(SarifResultLevel::Warning)
+                    .setLocations({Range})
+                    .setThreadFlows(Flows);
+  return Result;
 }
 
 void SarifDiagnostics::FlushDiagnosticsImpl(
@@ -391,10 +196,14 @@ void SarifDiagnostics::FlushDiagnosticsImpl(
     llvm::errs() << "warning: could not create file: " << EC.message() << '\n';
     return;
   }
-  json::Object Sarif{
-      {"$schema",
-       "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json"},
-      {"version", "2.1.0"},
-      {"runs", json::Array{createRun(LO, Diags)}}};
-  OS << llvm::formatv("{0:2}\n", json::Value(std::move(Sarif)));
+
+  std::string ToolVersion = getClangFullVersion();
+  SarifWriter.createRun("clang", "clang static analyzer", ToolVersion);
+  StringMap<uint32_t> RuleMapping = createRuleMapping(Diags, SarifWriter);
+  for (const PathDiagnostic *D : Diags) {
+    SarifResult Result = createResult(D, RuleMapping, LO);
+    SarifWriter.appendResult(Result);
+  }
+  auto Document = SarifWriter.createDocument();
+  OS << llvm::formatv("{0:2}\n", json::Value(std::move(Document)));
 }

diff  --git a/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif b/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif
index 2deb1bd90c259..0ac96cacbed19 100644
--- a/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif
+++ b/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif
@@ -1,11 +1,12 @@
 {
-  "$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json",
+  "$schema": "https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json",
   "runs": [
     {
       "artifacts": [
         {
           "length": 434,
           "location": {
+            "index": 0,
           },
           "mimeType": "text/plain",
           "roles": [
@@ -64,6 +65,7 @@
               ]
             }
           ],
+          "level": "warning",
           "locations": [
             {
               "physicalLocation": {
@@ -89,10 +91,16 @@
       "tool": {
         "driver": {
           "fullName": "clang static analyzer",
+          "informationUri": "https://clang.llvm.org/docs/UsersManual.html",
           "language": "en-US",
           "name": "clang",
           "rules": [
             {
+              "defaultConfiguration": {
+                "enabled": true,
+                "level": "warning",
+                "rank": -1
+              },
               "fullDescription": {
                 "text": "Mark tainted symbols as such."
               },

diff  --git a/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif b/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif
index 9005a3654ad0e..ae49ad6604cb2 100644
--- a/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif
+++ b/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif
@@ -1,11 +1,12 @@
 {
-  "$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json",
+  "$schema": "https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json",
   "runs": [
     {
       "artifacts": [
         {
           "length": 1081,
           "location": {
+            "index": 0,
           },
           "mimeType": "text/plain",
           "roles": [
@@ -64,6 +65,7 @@
               ]
             }
           ],
+          "level": "warning",
           "locations": [
             {
               "physicalLocation": {
@@ -153,6 +155,7 @@
               ]
             }
           ],
+          "level": "warning",
           "locations": [
             {
               "physicalLocation": {
@@ -240,6 +243,7 @@
               ]
             }
           ],
+          "level": "warning",
           "locations": [
             {
               "physicalLocation": {
@@ -345,6 +349,7 @@
               ]
             }
           ],
+          "level": "warning",
           "locations": [
             {
               "physicalLocation": {
@@ -413,6 +418,7 @@
               ]
             }
           ],
+          "level": "warning",
           "locations": [
             {
               "physicalLocation": {
@@ -437,10 +443,16 @@
       "tool": {
         "driver": {
           "fullName": "clang static analyzer",
+          "informationUri": "https://clang.llvm.org/docs/UsersManual.html",
           "language": "en-US",
           "name": "clang",
           "rules": [
             {
+              "defaultConfiguration": {
+                "enabled": true,
+                "level": "warning",
+                "rank": -1
+              },
               "fullDescription": {
                 "text": "Mark tainted symbols as such."
               },
@@ -448,6 +460,11 @@
               "name": "debug.TaintTest"
             },
             {
+              "defaultConfiguration": {
+                "enabled": true,
+                "level": "warning",
+                "rank": -1
+              },
               "fullDescription": {
                 "text": "Check for logical errors for function calls and Objective-C message expressions (e.g., uninitialized arguments, null function pointers)"
               },
@@ -456,6 +473,11 @@
               "name": "core.CallAndMessage"
             },
             {
+              "defaultConfiguration": {
+                "enabled": true,
+                "level": "warning",
+                "rank": -1
+              },
               "fullDescription": {
                 "text": "Check for division by zero"
               },
@@ -464,6 +486,11 @@
               "name": "core.DivideZero"
             },
             {
+              "defaultConfiguration": {
+                "enabled": true,
+                "level": "warning",
+                "rank": -1
+              },
               "fullDescription": {
                 "text": "Check for memory leaks, double free, and use-after-free problems. Traces memory managed by malloc()/free()."
               },

diff  --git a/clang/test/Frontend/sarif-diagnostics.cpp b/clang/test/Frontend/sarif-diagnostics.cpp
index 9e390285b30dd..3a35131cdb004 100644
--- a/clang/test/Frontend/sarif-diagnostics.cpp
+++ b/clang/test/Frontend/sarif-diagnostics.cpp
@@ -33,25 +33,25 @@ void f1(t1 x, t1 y) {
 // CHECK: ,"location":{"index":0,"uri":"file://
 // Omit filepath to llvm project directory
 // CHECK: clang/test/Frontend/sarif-diagnostics.cpp"},"mimeType":"text/plain","roles":["resultFile"]}],"columnKind":"unicodeCodePoints","results":
-// CHECK: [{"level":"error","locations":[{"physicalLocation":{"artifactLocation":{"index":0},"region":
+// CHECK: [{"level":"error","locations":[{"physicalLocation":{"artifactLocation":{"index":0,"uri":"file://
 // CHECK: {"endColumn":1,"startColumn":1,"startLine":12}}}],"message":{"text":"'main' must return 'int'"},"ruleId":"{{[0-9]+}}","ruleIndex":0},
-// CHECK: {"level":"error","locations":[{"physicalLocation":{"artifactLocation":{"index":0},"region":
+// CHECK: {"level":"error","locations":[{"physicalLocation":{"artifactLocation":{"index":0,"uri":"file://
 // CHECK: {"endColumn":11,"startColumn":11,"startLine":13}}}],"message":{"text":"use of undeclared identifier
 // CHECK: 'hello'"},"ruleId":"{{[0-9]+}}","ruleIndex":1},{"level":"error","locations":[{"physicalLocation":{"artifactLocation":
-// CHECK: {"index":0},"region":{"endColumn":17,"startColumn":17,"startLine":15}}}],"message":{"text":"invalid digit 'a' in decimal
+// CHECK: {"index":0,"uri":"file://{{.+}}"},"region":{"endColumn":17,"startColumn":17,"startLine":15}}}],"message":{"text":"invalid digit 'a' in decimal
 // CHECK: constant"},"ruleId":"{{[0-9]+}}","ruleIndex":2},{"level":"warning","locations":[{"physicalLocation":{"artifactLocation":
-// CHECK: {"index":0},"region":{"endColumn":5,"startColumn":5,"startLine":19}}}],"message":{"text":"misleading indentation; statement is not part
+// CHECK: {"index":0,"uri":"file://{{.+}}"},"region":{"endColumn":5,"startColumn":5,"startLine":19}}}],"message":{"text":"misleading indentation; statement is not part
 // CHECK: of the previous 'if'"},"ruleId":"{{[0-9]+}}","ruleIndex":3},{"level":"note","locations":[{"physicalLocation":{"artifactLocation":
-// CHECK: {"index":0},"region":{"endColumn":3,"startColumn":3,"startLine":17}}}],"message":{"text":"previous statement is
+// CHECK: {"index":0,"uri":"file://{{.+}}"},"region":{"endColumn":3,"startColumn":3,"startLine":17}}}],"message":{"text":"previous statement is
 // CHECK: here"},"ruleId":"{{[0-9]+}}","ruleIndex":4},{"level":"warning","locations":[{"physicalLocation":{"artifactLocation":
-// CHECK: {"index":0},"region":{"endColumn":10,"startColumn":10,"startLine":18}}}],"message":{"text":"unused variable
-// CHECK: 'Yes'"},"ruleId":"{{[0-9]+}}","ruleIndex":5},{"level":"error","locations":[{"physicalLocation":{"artifactLocation":{"index":0},"region":
+// CHECK: {"index":0,"uri":"file://{{.+}}"},"region":{"endColumn":10,"startColumn":10,"startLine":18}}}],"message":{"text":"unused variable
+// CHECK: 'Yes'"},"ruleId":"{{[0-9]+}}","ruleIndex":5},{"level":"error","locations":[{"physicalLocation":{"artifactLocation":{"index":0,"uri":"file://
 // CHECK: {"endColumn":12,"startColumn":12,"startLine":21}}}],"message":{"text":"use of undeclared identifier
-// CHECK: 'hi'"},"ruleId":"{{[0-9]+}}","ruleIndex":6},{"level":"error","locations":[{"physicalLocation":{"artifactLocation":{"index":0},"region":
+// CHECK: 'hi'"},"ruleId":"{{[0-9]+}}","ruleIndex":6},{"level":"error","locations":[{"physicalLocation":{"artifactLocation":{"index":0,"uri":"file://
 // CHECK: {"endColumn":1,"startColumn":1,"startLine":23}}}],"message":{"text":"extraneous closing brace
-// CHECK: ('}')"},"ruleId":"{{[0-9]+}}","ruleIndex":7},{"level":"error","locations":[{"physicalLocation":{"artifactLocation":{"index":0},"region":
-// CHECK: {"endColumn":6,"endLine":27,"startColumn":5,"startLine":27}}},{"physicalLocation":{"artifactLocation":{"index":0},"region":
-// CHECK: {"endColumn":10,"endLine":27,"startColumn":9,"startLine":27}}},{"physicalLocation":{"artifactLocation":{"index":0},"region":
+// CHECK: ('}')"},"ruleId":"{{[0-9]+}}","ruleIndex":7},{"level":"error","locations":[{"physicalLocation":{"artifactLocation":{"index":0,"uri":"file://
+// CHECK: {"endColumn":6,"endLine":27,"startColumn":5,"startLine":27}}},{"physicalLocation":{"artifactLocation":{"index":0,"uri":"file://
+// CHECK: {"endColumn":10,"endLine":27,"startColumn":9,"startLine":27}}},{"physicalLocation":{"artifactLocation":{"index":0,"uri":"file://
 // CHECK: {"endColumn":7,"startColumn":7,"startLine":27}}}],"message":{"text":"invalid operands to binary expression ('t1' and
 // CHECK: 't1')"},"ruleId":"{{[0-9]+}}","ruleIndex":8}],"tool":{"driver":{"fullName":"","informationUri":"https://clang.llvm.org/docs/
 // CHECK: UsersManual.html","language":"en-US","name":"clang","rules":[{"defaultConfiguration":

diff  --git a/clang/unittests/Basic/SarifTest.cpp b/clang/unittests/Basic/SarifTest.cpp
index fb68f4a4b99b8..9bbed582f50b6 100644
--- a/clang/unittests/Basic/SarifTest.cpp
+++ b/clang/unittests/Basic/SarifTest.cpp
@@ -292,7 +292,7 @@ TEST_F(SarifDocumentWriterTest, checkSerializingResultsWithCustomRuleConfig) {
 TEST_F(SarifDocumentWriterTest, checkSerializingArtifacts) {
   // GIVEN:
   const std::string ExpectedOutput =
-      R"({"$schema":"https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json","runs":[{"artifacts":[{"length":40,"location":{"index":0,"uri":"file:///main.cpp"},"mimeType":"text/plain","roles":["resultFile"]}],"columnKind":"unicodeCodePoints","results":[{"level":"error","locations":[{"physicalLocation":{"artifactLocation":{"index":0},"region":{"endColumn":14,"startColumn":14,"startLine":3}}}],"message":{"text":"expected ';' after top level declarator"},"ruleId":"clang.unittest","ruleIndex":0}],"tool":{"driver":{"fullName":"sarif test runner","informationUri":"https://clang.llvm.org/docs/UsersManual.html","language":"en-US","name":"sarif test","rules":[{"defaultConfiguration":{"enabled":true,"level":"warning","rank":-1},"fullDescription":{"text":"Example rule created during unit tests"},"id":"clang.unittest","name":"clang unit test"}],"version":"1.0.0"}}}],"version":"2.1.0"})";
+      R"({"$schema":"https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json","runs":[{"artifacts":[{"length":40,"location":{"index":0,"uri":"file:///main.cpp"},"mimeType":"text/plain","roles":["resultFile"]}],"columnKind":"unicodeCodePoints","results":[{"level":"error","locations":[{"physicalLocation":{"artifactLocation":{"index":0,"uri":"file:///main.cpp"},"region":{"endColumn":14,"startColumn":14,"startLine":3}}}],"message":{"text":"expected ';' after top level declarator"},"ruleId":"clang.unittest","ruleIndex":0}],"tool":{"driver":{"fullName":"sarif test runner","informationUri":"https://clang.llvm.org/docs/UsersManual.html","language":"en-US","name":"sarif test","rules":[{"defaultConfiguration":{"enabled":true,"level":"warning","rank":-1},"fullDescription":{"text":"Example rule created during unit tests"},"id":"clang.unittest","name":"clang unit test"}],"version":"1.0.0"}}}],"version":"2.1.0"})";
 
   SarifDocumentWriter Writer{SourceMgr};
   const SarifRule &Rule =
@@ -332,7 +332,7 @@ TEST_F(SarifDocumentWriterTest, checkSerializingArtifacts) {
 TEST_F(SarifDocumentWriterTest, checkSerializingCodeflows) {
   // GIVEN:
   const std::string ExpectedOutput =
-      R"({"$schema":"https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json","runs":[{"artifacts":[{"length":27,"location":{"index":1,"uri":"file:///test-header-1.h"},"mimeType":"text/plain","roles":["resultFile"]},{"length":30,"location":{"index":2,"uri":"file:///test-header-2.h"},"mimeType":"text/plain","roles":["resultFile"]},{"length":28,"location":{"index":3,"uri":"file:///test-header-3.h"},"mimeType":"text/plain","roles":["resultFile"]},{"length":41,"location":{"index":0,"uri":"file:///main.cpp"},"mimeType":"text/plain","roles":["resultFile"]}],"columnKind":"unicodeCodePoints","results":[{"codeFlows":[{"threadFlows":[{"locations":[{"importance":"essential","location":{"message":{"text":"Message #1"},"physicalLocation":{"artifactLocation":{"index":1},"region":{"endColumn":8,"endLine":2,"startColumn":1,"startLine":1}}}},{"importance":"important","location":{"message":{"text":"Message #2"},"physicalLocation":{"artifactLocation":{"index":2},"region":{"endColumn":8,"endLine":2,"startColumn":1,"startLine":1}}}},{"importance":"unimportant","location":{"message":{"text":"Message #3"},"physicalLocation":{"artifactLocation":{"index":3},"region":{"endColumn":8,"endLine":2,"startColumn":1,"startLine":1}}}}]}]}],"level":"warning","locations":[{"physicalLocation":{"artifactLocation":{"index":0},"region":{"endColumn":8,"endLine":2,"startColumn":5,"startLine":2}}}],"message":{"text":"Redefinition of 'foo'"},"ruleId":"clang.unittest","ruleIndex":0}],"tool":{"driver":{"fullName":"sarif test runner","informationUri":"https://clang.llvm.org/docs/UsersManual.html","language":"en-US","name":"sarif test","rules":[{"defaultConfiguration":{"enabled":true,"level":"warning","rank":-1},"fullDescription":{"text":"Example rule created during unit tests"},"id":"clang.unittest","name":"clang unit test"}],"version":"1.0.0"}}}],"version":"2.1.0"})";
+      R"({"$schema":"https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json","runs":[{"artifacts":[{"length":27,"location":{"index":1,"uri":"file:///test-header-1.h"},"mimeType":"text/plain","roles":["resultFile"]},{"length":30,"location":{"index":2,"uri":"file:///test-header-2.h"},"mimeType":"text/plain","roles":["resultFile"]},{"length":28,"location":{"index":3,"uri":"file:///test-header-3.h"},"mimeType":"text/plain","roles":["resultFile"]},{"length":41,"location":{"index":0,"uri":"file:///main.cpp"},"mimeType":"text/plain","roles":["resultFile"]}],"columnKind":"unicodeCodePoints","results":[{"codeFlows":[{"threadFlows":[{"locations":[{"importance":"essential","location":{"message":{"text":"Message #1"},"physicalLocation":{"artifactLocation":{"index":1,"uri":"file:///test-header-1.h"},"region":{"endColumn":8,"endLine":2,"startColumn":1,"startLine":1}}}},{"importance":"important","location":{"message":{"text":"Message #2"},"physicalLocation":{"artifactLocation":{"index":2,"uri":"file:///test-header-2.h"},"region":{"endColumn":8,"endLine":2,"startColumn":1,"startLine":1}}}},{"importance":"unimportant","location":{"message":{"text":"Message #3"},"physicalLocation":{"artifactLocation":{"index":3,"uri":"file:///test-header-3.h"},"region":{"endColumn":8,"endLine":2,"startColumn":1,"startLine":1}}}}]}]}],"level":"warning","locations":[{"physicalLocation":{"artifactLocation":{"index":0,"uri":"file:///main.cpp"},"region":{"endColumn":8,"endLine":2,"startColumn":5,"startLine":2}}}],"message":{"text":"Redefinition of 'foo'"},"ruleId":"clang.unittest","ruleIndex":0}],"tool":{"driver":{"fullName":"sarif test runner","informationUri":"https://clang.llvm.org/docs/UsersManual.html","language":"en-US","name":"sarif test","rules":[{"defaultConfiguration":{"enabled":true,"level":"warning","rank":-1},"fullDescription":{"text":"Example rule created during unit tests"},"id":"clang.unittest","name":"clang unit test"}],"version":"1.0.0"}}}],"version":"2.1.0"})";
 
   const char *SourceText = "int foo = 0;\n"
                            "int foo = 1;\n"


        


More information about the cfe-commits mailing list