[llvm] [TextAPI] Consolidate TextAPI Reader/Writer APIs. (PR #66108)

Cyndy Ishida via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 14 08:47:27 PDT 2023


https://github.com/cyndyishida updated https://github.com/llvm/llvm-project/pull/66108:

>From cc74b88aa5f39ff50e9de87b6723b9951c1e9187 Mon Sep 17 00:00:00 2001
From: Cyndy Ishida <cyndy_ishida at apple.com>
Date: Tue, 12 Sep 2023 09:33:29 -0700
Subject: [PATCH] [TextAPI] Consolidate TextAPI Reader/Writer APIs.

Both Swift & LLD use TextAPI reader/writer apis to interface with
TBD files. Add doc strings to document what each API does.
Also, add shortcut APIs for validating input is a TBD
file.

This reduces the differences between downstream and how
tapi calls into these APIs.
---
 llvm/include/llvm/Object/TapiFile.h        | 10 ++--
 llvm/include/llvm/TextAPI/TextAPIReader.h  | 14 +++++
 llvm/include/llvm/TextAPI/TextAPIWriter.h  | 12 +++-
 llvm/lib/Object/TapiFile.cpp               |  3 +-
 llvm/lib/TextAPI/TextStub.cpp              | 66 +++++++++++++---------
 llvm/lib/TextAPI/TextStubCommon.h          |  2 +-
 llvm/lib/TextAPI/TextStubV5.cpp            |  8 +--
 llvm/tools/llvm-nm/llvm-nm.cpp             |  6 +-
 llvm/unittests/TextAPI/TextStubV5Tests.cpp | 11 +++-
 9 files changed, 87 insertions(+), 45 deletions(-)

diff --git a/llvm/include/llvm/Object/TapiFile.h b/llvm/include/llvm/Object/TapiFile.h
index 53889a3125cb1ac..c1de6608bb624c9 100644
--- a/llvm/include/llvm/Object/TapiFile.h
+++ b/llvm/include/llvm/Object/TapiFile.h
@@ -20,17 +20,12 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/MemoryBufferRef.h"
 #include "llvm/TextAPI/Architecture.h"
+#include "llvm/TextAPI/InterfaceFile.h"
 
 namespace llvm {
 
 class raw_ostream;
 
-namespace MachO {
-
-class InterfaceFile;
-
-}
-
 namespace object {
 
 class TapiFile : public SymbolicFile {
@@ -51,6 +46,8 @@ class TapiFile : public SymbolicFile {
 
   Expected<SymbolRef::Type> getSymbolType(DataRefImpl DRI) const;
 
+  bool hasSegmentInfo() { return FileKind >= MachO::FileType::TBD_V5; }
+
   static bool classof(const Binary *v) { return v->isTapiFile(); }
 
   bool is64Bit() const override { return MachO::is64Bit(Arch); }
@@ -69,6 +66,7 @@ class TapiFile : public SymbolicFile {
 
   std::vector<Symbol> Symbols;
   MachO::Architecture Arch;
+  MachO::FileType FileKind;
 };
 
 } // end namespace object.
diff --git a/llvm/include/llvm/TextAPI/TextAPIReader.h b/llvm/include/llvm/TextAPI/TextAPIReader.h
index 389335312a74ed5..32af0e3601f1879 100644
--- a/llvm/include/llvm/TextAPI/TextAPIReader.h
+++ b/llvm/include/llvm/TextAPI/TextAPIReader.h
@@ -18,9 +18,23 @@ class MemoryBufferRef;
 namespace MachO {
 
 class InterfaceFile;
+enum FileType : unsigned;
 
 class TextAPIReader {
 public:
+  ///  Determine whether input can be interpreted as TAPI text file.
+  ///  This allows one to exit early when file is not recognized as TAPI file
+  ///  as opposed to `get` which attempts to full parse and load of library
+  ///  attributes.
+  ///
+  /// \param InputBuffer Buffer holding contents of TAPI text file.
+  /// \return The file format version of TAPI text file.
+  static Expected<FileType> canRead(MemoryBufferRef InputBuffer);
+
+  /// Parse and get an InterfaceFile that represents the full
+  /// library.
+  ///
+  /// \param InputBuffer Buffer holding contents of TAPI text file.
   static Expected<std::unique_ptr<InterfaceFile>>
   get(MemoryBufferRef InputBuffer);
 
diff --git a/llvm/include/llvm/TextAPI/TextAPIWriter.h b/llvm/include/llvm/TextAPI/TextAPIWriter.h
index 9bdaaf58d09f31e..89fc984854dbae0 100644
--- a/llvm/include/llvm/TextAPI/TextAPIWriter.h
+++ b/llvm/include/llvm/TextAPI/TextAPIWriter.h
@@ -9,6 +9,8 @@
 #ifndef LLVM_TEXTAPI_TEXTAPIWRITER_H
 #define LLVM_TEXTAPI_TEXTAPIWRITER_H
 
+#include "llvm/TextAPI/InterfaceFile.h"
+
 namespace llvm {
 
 class Error;
@@ -16,13 +18,19 @@ class raw_ostream;
 
 namespace MachO {
 
-class InterfaceFile;
-
 class TextAPIWriter {
 public:
   TextAPIWriter() = delete;
 
+  /// Write TAPI text file contents into stream.
+  ///
+  /// \param OS Stream to write to.
+  /// \param File Library attributes to write as text file.
+  /// \param FileKind File format to write text file as. If not specified, it
+  /// will read from File.
+  /// \param Compact Whether to limit whitespace in text file.
   static Error writeToStream(raw_ostream &OS, const InterfaceFile &File,
+                             const FileType FileKind = FileType::Invalid,
                              bool Compact = false);
 };
 
diff --git a/llvm/lib/Object/TapiFile.cpp b/llvm/lib/Object/TapiFile.cpp
index b5f4d277bbfe19b..fcf61541941ea3e 100644
--- a/llvm/lib/Object/TapiFile.cpp
+++ b/llvm/lib/Object/TapiFile.cpp
@@ -49,7 +49,8 @@ static SymbolRef::Type getType(const Symbol *Sym) {
 
 TapiFile::TapiFile(MemoryBufferRef Source, const InterfaceFile &Interface,
                    Architecture Arch)
-    : SymbolicFile(ID_TapiFile, Source), Arch(Arch) {
+    : SymbolicFile(ID_TapiFile, Source), Arch(Arch),
+      FileKind(Interface.getFileType()) {
   for (const auto *Symbol : Interface.symbols()) {
     if (!Symbol->getArchitectures().has(Arch))
       continue;
diff --git a/llvm/lib/TextAPI/TextStub.cpp b/llvm/lib/TextAPI/TextStub.cpp
index b9b6061101c6b88..387482296b6f35a 100644
--- a/llvm/lib/TextAPI/TextStub.cpp
+++ b/llvm/lib/TextAPI/TextStub.cpp
@@ -620,6 +620,11 @@ template <> struct MappingTraits<const InterfaceFile *> {
             !(Flags & TBDFlags::NotApplicationExtensionSafe));
       }
 
+      // For older file formats, the segment where the symbol
+      // comes from is unknown, treat all symbols as Data
+      // in these cases.
+      const auto Flags = SymbolFlags::Data;
+
       for (const auto &Section : Exports) {
         const auto Targets =
             synthesizeTargets(Section.Architectures, Platforms);
@@ -634,26 +639,27 @@ template <> struct MappingTraits<const InterfaceFile *> {
 
         for (const auto &Symbol : Section.Symbols) {
           if (Ctx->FileKind != FileType::TBD_V3 &&
-              Symbol.value.startswith("_OBJC_EHTYPE_$_"))
+              Symbol.value.startswith(ObjC2EHTypePrefix))
             File->addSymbol(SymbolKind::ObjectiveCClassEHType,
-                            Symbol.value.drop_front(15), Targets);
+                            Symbol.value.drop_front(15), Targets, Flags);
           else
-            File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets);
+            File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets, Flags);
         }
         for (auto &Symbol : Section.Classes) {
           auto Name = Symbol.value;
           if (Ctx->FileKind != FileType::TBD_V3)
             Name = Name.drop_front();
-          File->addSymbol(SymbolKind::ObjectiveCClass, Name, Targets);
+          File->addSymbol(SymbolKind::ObjectiveCClass, Name, Targets, Flags);
         }
         for (auto &Symbol : Section.ClassEHs)
-          File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol, Targets);
+          File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol, Targets,
+                          Flags);
         for (auto &Symbol : Section.IVars) {
           auto Name = Symbol.value;
           if (Ctx->FileKind != FileType::TBD_V3)
             Name = Name.drop_front();
-          File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, Name,
-                          Targets);
+          File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, Name, Targets,
+                          Flags);
         }
         for (auto &Symbol : Section.WeakDefSymbols)
           File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets,
@@ -668,34 +674,35 @@ template <> struct MappingTraits<const InterfaceFile *> {
             synthesizeTargets(Section.Architectures, Platforms);
         for (auto &Symbol : Section.Symbols) {
           if (Ctx->FileKind != FileType::TBD_V3 &&
-              Symbol.value.startswith("_OBJC_EHTYPE_$_"))
+              Symbol.value.startswith(ObjC2EHTypePrefix))
             File->addSymbol(SymbolKind::ObjectiveCClassEHType,
                             Symbol.value.drop_front(15), Targets,
-                            SymbolFlags::Undefined);
+                            SymbolFlags::Undefined | Flags);
           else
             File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets,
-                            SymbolFlags::Undefined);
+                            SymbolFlags::Undefined | Flags);
         }
         for (auto &Symbol : Section.Classes) {
           auto Name = Symbol.value;
           if (Ctx->FileKind != FileType::TBD_V3)
             Name = Name.drop_front();
           File->addSymbol(SymbolKind::ObjectiveCClass, Name, Targets,
-                          SymbolFlags::Undefined);
+                          SymbolFlags::Undefined | Flags);
         }
         for (auto &Symbol : Section.ClassEHs)
           File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol, Targets,
-                          SymbolFlags::Undefined);
+                          SymbolFlags::Undefined | Flags);
         for (auto &Symbol : Section.IVars) {
           auto Name = Symbol.value;
           if (Ctx->FileKind != FileType::TBD_V3)
             Name = Name.drop_front();
           File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, Name, Targets,
-                          SymbolFlags::Undefined);
+                          SymbolFlags::Undefined | Flags);
         }
         for (auto &Symbol : Section.WeakRefSymbols)
           File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets,
-                          SymbolFlags::Undefined | SymbolFlags::WeakReferenced);
+                          SymbolFlags::Undefined | SymbolFlags::WeakReferenced |
+                              Flags);
       }
 
       return File;
@@ -906,7 +913,12 @@ template <> struct MappingTraits<const InterfaceFile *> {
       }
 
       auto handleSymbols = [File](const SectionList &CurrentSections,
-                                  SymbolFlags Flag = SymbolFlags::None) {
+                                  SymbolFlags InputFlag = SymbolFlags::None) {
+        // For older file formats, the segment where the symbol
+        // comes from is unknown, treat all symbols as Data
+        // in these cases.
+        const SymbolFlags Flag = InputFlag | SymbolFlags::Data;
+
         for (const auto &CurrentSection : CurrentSections) {
           for (auto &sym : CurrentSection.Symbols)
             File->addSymbol(SymbolKind::GlobalSymbol, sym,
@@ -924,9 +936,10 @@ template <> struct MappingTraits<const InterfaceFile *> {
             File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, sym,
                             CurrentSection.Targets, Flag);
 
-          SymbolFlags SymFlag = (Flag == SymbolFlags::Undefined)
-                                    ? SymbolFlags::WeakReferenced
-                                    : SymbolFlags::WeakDefined;
+          SymbolFlags SymFlag =
+              ((Flag & SymbolFlags::Undefined) == SymbolFlags::Undefined)
+                  ? SymbolFlags::WeakReferenced
+                  : SymbolFlags::WeakDefined;
           for (auto &sym : CurrentSection.WeakSymbols) {
             File->addSymbol(SymbolKind::GlobalSymbol, sym,
                             CurrentSection.Targets, Flag | SymFlag);
@@ -1078,9 +1091,7 @@ static void DiagHandler(const SMDiagnostic &Diag, void *Context) {
   File->ErrorMessage = ("malformed file\n" + Message).str();
 }
 
-namespace {
-
-Expected<FileType> canReadFileType(MemoryBufferRef InputBuffer) {
+Expected<FileType> TextAPIReader::canRead(MemoryBufferRef InputBuffer) {
   auto TAPIFile = InputBuffer.getBuffer().trim();
   if (TAPIFile.startswith("{") && TAPIFile.endswith("}"))
     return FileType::TBD_V5;
@@ -1103,13 +1114,12 @@ Expected<FileType> canReadFileType(MemoryBufferRef InputBuffer) {
 
   return createStringError(std::errc::not_supported, "unsupported file type");
 }
-} // namespace
 
 Expected<std::unique_ptr<InterfaceFile>>
 TextAPIReader::get(MemoryBufferRef InputBuffer) {
   TextAPIContext Ctx;
   Ctx.Path = std::string(InputBuffer.getBufferIdentifier());
-  if (auto FTOrErr = canReadFileType(InputBuffer))
+  if (auto FTOrErr = canRead(InputBuffer))
     Ctx.FileKind = *FTOrErr;
   else
     return FTOrErr.takeError();
@@ -1145,14 +1155,18 @@ TextAPIReader::get(MemoryBufferRef InputBuffer) {
 }
 
 Error TextAPIWriter::writeToStream(raw_ostream &OS, const InterfaceFile &File,
-                                   bool Compact) {
+                                   const FileType FileKind, bool Compact) {
   TextAPIContext Ctx;
   Ctx.Path = std::string(File.getPath());
-  Ctx.FileKind = File.getFileType();
+
+  // Prefer parameter for format if passed, otherwise fallback to the File
+  // FileType.
+  Ctx.FileKind =
+      (FileKind == FileType::Invalid) ? File.getFileType() : FileKind;
 
   // Write out in JSON format.
   if (Ctx.FileKind >= FileType::TBD_V5) {
-    return serializeInterfaceFileToJSON(OS, File, Compact);
+    return serializeInterfaceFileToJSON(OS, File, Ctx.FileKind, Compact);
   }
 
   llvm::yaml::Output YAMLOut(OS, &Ctx, /*WrapColumn=*/80);
diff --git a/llvm/lib/TextAPI/TextStubCommon.h b/llvm/lib/TextAPI/TextStubCommon.h
index 385c2ceab51ae3b..5faba09fa1bbff2 100644
--- a/llvm/lib/TextAPI/TextStubCommon.h
+++ b/llvm/lib/TextAPI/TextStubCommon.h
@@ -48,7 +48,7 @@ Expected<std::unique_ptr<InterfaceFile>>
 getInterfaceFileFromJSON(StringRef JSON);
 
 Error serializeInterfaceFileToJSON(raw_ostream &OS, const InterfaceFile &File,
-                                   bool Compact);
+                                   const FileType FileKind, bool Compact);
 } // namespace MachO
 
 namespace yaml {
diff --git a/llvm/lib/TextAPI/TextStubV5.cpp b/llvm/lib/TextAPI/TextStubV5.cpp
index 31d7dffad306aad..f6a3fef088e4641 100644
--- a/llvm/lib/TextAPI/TextStubV5.cpp
+++ b/llvm/lib/TextAPI/TextStubV5.cpp
@@ -986,9 +986,8 @@ Expected<Object> serializeIF(const InterfaceFile *File) {
   return std::move(Library);
 }
 
-Expected<Object> getJSON(const InterfaceFile *File) {
-  assert(File->getFileType() == FileType::TBD_V5 &&
-         "unexpected json file format version");
+Expected<Object> getJSON(const InterfaceFile *File, const FileType FileKind) {
+  assert(FileKind == FileType::TBD_V5 && "unexpected json file format version");
   Object Root;
 
   auto MainLibOrErr = serializeIF(File);
@@ -1012,8 +1011,9 @@ Expected<Object> getJSON(const InterfaceFile *File) {
 
 Error MachO::serializeInterfaceFileToJSON(raw_ostream &OS,
                                           const InterfaceFile &File,
+                                          const FileType FileKind,
                                           bool Compact) {
-  auto TextFile = getJSON(&File);
+  auto TextFile = getJSON(&File, FileKind);
   if (!TextFile)
     return TextFile.takeError();
   if (Compact)
diff --git a/llvm/tools/llvm-nm/llvm-nm.cpp b/llvm/tools/llvm-nm/llvm-nm.cpp
index 8ac7eb2a825b57e..9a9e8bd146bb659 100644
--- a/llvm/tools/llvm-nm/llvm-nm.cpp
+++ b/llvm/tools/llvm-nm/llvm-nm.cpp
@@ -1023,10 +1023,12 @@ static char getSymbolNMTypeChar(MachOObjectFile &Obj, basic_symbol_iterator I) {
 static char getSymbolNMTypeChar(TapiFile &Obj, basic_symbol_iterator I) {
   auto Type = cantFail(Obj.getSymbolType(I->getRawDataRefImpl()));
   switch (Type) {
-  case SymbolRef::ST_Data:
-    return 'd';
   case SymbolRef::ST_Function:
     return 't';
+  case SymbolRef::ST_Data:
+    if (Obj.hasSegmentInfo())
+      return 'd';
+    [[fallthrough]];
   default:
     return 's';
   }
diff --git a/llvm/unittests/TextAPI/TextStubV5Tests.cpp b/llvm/unittests/TextAPI/TextStubV5Tests.cpp
index 54ca21b91c4d568..60976a5f0648186 100644
--- a/llvm/unittests/TextAPI/TextStubV5Tests.cpp
+++ b/llvm/unittests/TextAPI/TextStubV5Tests.cpp
@@ -185,11 +185,15 @@ TEST(TBDv5, ReadFile) {
 "libraries": []
 })";
 
-  Expected<TBDFile> Result =
-      TextAPIReader::get(MemoryBufferRef(TBDv5File, "Test.tbd"));
+  MemoryBufferRef InputBuf = MemoryBufferRef(TBDv5File, "Test.tbd");
+  Expected<FileType> ExpectedFT = TextAPIReader::canRead(InputBuf);
+  EXPECT_TRUE(!!ExpectedFT);
+
+  Expected<TBDFile> Result = TextAPIReader::get(InputBuf);
   EXPECT_TRUE(!!Result);
   TBDFile File = std::move(Result.get());
   EXPECT_EQ(FileType::TBD_V5, File->getFileType());
+  EXPECT_EQ(*ExpectedFT, File->getFileType());
   EXPECT_EQ(std::string("/S/L/F/Foo.framework/Foo"), File->getInstallName());
 
   TargetList AllTargets = {
@@ -915,7 +919,8 @@ TEST(TBDv5, WriteMultipleDocuments) {
   // against TBDv5File.
   SmallString<4096> Buffer;
   raw_svector_ostream OS(Buffer);
-  Error Result = TextAPIWriter::writeToStream(OS, File, /*Compact=*/true);
+  Error Result = TextAPIWriter::writeToStream(OS, File, FileType::Invalid,
+                                              /*Compact=*/true);
   EXPECT_FALSE(Result);
 
   Expected<TBDFile> Input =



More information about the llvm-commits mailing list