[clang] [ssaf][UnsafeBufferUsage] Add JSON serialization for UnsafeBufferUsage (PR #187156)

Fri Mar 27 07:37:22 PDT 2026

================
@@ -184,6 +186,128 @@ TEST_F(UnsafeBufferUsageTest, UnsafeBufferUsageEntityPointerLevelSetTest) {
   EXPECT_THAT(getSubsetOf(Set, E3), UnorderedElementsAre(P5));
 }
 
+//////////////////////////////////////////////////////////////
+//                     JSON Tests                           //
+//////////////////////////////////////////////////////////////
+// Oracle JSON output for the example:
+// void foo(int ***p, int ****q, int x) {
+//   p[5][5][5];
+//   q[5][5][5][5];
+// }
+constexpr const char *const SerilizationTestOracle = R"cpp({
+  "UnsafeBuffers": [
+    [
+      {
+        "@": 42
+      },
+      1
+    ],
+    [
+      {
+        "@": 42
+      },
+      2
+    ],
+    [
+      {
+        "@": 42
+      },
+      3
+    ],
+    [
+      {
+        "@": 108
+      },
+      1
+    ],
+    [
+      {
+        "@": 108
+      },
+      2
+    ],
+    [
+      {
+        "@": 108
+      },
+      3
+    ],
+    [
+      {
+        "@": 108
+      },
+      4
+    ]
+  ]
+})cpp";
+
+TEST_F(UnsafeBufferUsageTest, UnsafeBufferUsageSerializeTest) {
+  auto Sum = setUpTest(R"cpp(
+    void foo(int ***p, int ****q, int x) {
+      p[5][5][5];
+      q[5][5][5][5];
+    }
+  )cpp",
+                       "foo");
+  ASSERT_NE(Sum, nullptr);
+  EXPECT_EQ(*Sum, makeSet(__LINE__, {{"p", 1U},
+                                     {"p", 2U},
+                                     {"p", 3U},
+                                     {"q", 1U},
+                                     {"q", 2U},
+                                     {"q", 3U},
+                                     {"q", 4U}}));
+
+  using Object = llvm::json::Object;
+
+  std::map<EntityId, uint64_t> DummyTable{{*getEntityId("p"), 42},
+                                          {*getEntityId("q"), 108}};
+  Object JData = UnsafeBufferUsageEntitySummary::jsonSerializeFn(
+      *Sum,
+      [&DummyTable](EntityId Id) { return Object{{"@", DummyTable[Id]}}; });
+
+  EXPECT_EQ(llvm::formatv("{0:2}", llvm::json::Value(std::move(JData))).str(),
+            SerilizationTestOracle);
+}
+
+TEST_F(UnsafeBufferUsageTest, UnsafeBufferUsageDeserializeTest) {
+  auto Sum = setUpTest(R"cpp(
+    void foo(int ***p, int ****q, int x) {
+      p[5][5][5];
+      q[5][5][5][5];
+    }
+  )cpp",
+                       "foo");
+  ASSERT_NE(Sum, nullptr);
+  EXPECT_EQ(*Sum, makeSet(__LINE__, {{"p", 1U},
+                                     {"p", 2U},
+                                     {"p", 3U},
+                                     {"q", 1U},
+                                     {"q", 2U},
+                                     {"q", 3U},
+                                     {"q", 4U}}));
+
+  using Object = llvm::json::Object;
+  using Value = llvm::json::Value;
+  std::map<uint64_t, EntityId> DummyTable{{42, *getEntityId("p")},
+                                          {108, *getEntityId("q")}};
+  Expected<Value> ParsedJSON = llvm::json::parse(SerilizationTestOracle);
+
+  ASSERT_THAT_EXPECTED(ParsedJSON, llvm::Succeeded());
+  ASSERT_NE(ParsedJSON->getAsObject(), nullptr);
+
+  EntityIdTable Ignored;
+  auto ParsedSum = UnsafeBufferUsageEntitySummary::jsonDeserializeFn(
+      *ParsedJSON->getAsObject(), Ignored,
+      [&DummyTable](const Object &O) -> Expected<EntityId> {
+        return DummyTable.at(O.getInteger("@").value());
----------------
steakhal wrote:

I think we have a tension between us in the mindset of how to and to what degree to test things.

My problem with json-specific unittests is that if we follow this pattern then let's assume that all analyses would be expected to test the internal details of how the JSON format embeds the content.

This means that the number of tests are not `O(1)` (assuming that we have a constant number of tests for the JSON format embedding), but rather it would scale with the number of analyses `O(N)`.
This would over time essentially pin the implementation of the JSON format because it would be harder and harder to migrate/change over time.
I think this is a wrong concept and breaks the orthogonality principle.

https://github.com/llvm/llvm-project/pull/187156