[llvm] 009f88f - [SystemZ][z/OS] TXT records in the GOFF reader (#74526)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 27 08:31:25 PDT 2024


Author: Yusra Syeda
Date: 2024-03-27T11:31:21-04:00
New Revision: 009f88fc0e3a036be97ef7b222b90af342bae0b7

URL: https://github.com/llvm/llvm-project/commit/009f88fc0e3a036be97ef7b222b90af342bae0b7
DIFF: https://github.com/llvm/llvm-project/commit/009f88fc0e3a036be97ef7b222b90af342bae0b7.diff

LOG: [SystemZ][z/OS] TXT records in the GOFF reader (#74526)

This PR adds handling for TXT records in the GOFF reader.

---------

Authored-by: Yusra Syeda <yusra.syeda at ibm.com>

Added: 
    

Modified: 
    llvm/include/llvm/Object/GOFF.h
    llvm/include/llvm/Object/GOFFObjectFile.h
    llvm/lib/Object/GOFFObjectFile.cpp
    llvm/unittests/Object/GOFFObjectFileTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Object/GOFF.h b/llvm/include/llvm/Object/GOFF.h
index 91762457ae0563..9fb8876e893d57 100644
--- a/llvm/include/llvm/Object/GOFF.h
+++ b/llvm/include/llvm/Object/GOFF.h
@@ -73,6 +73,26 @@ class Record {
   }
 };
 
+class TXTRecord : public Record {
+public:
+  /// \brief Maximum length of data; any more must go in continuation.
+  static const uint8_t TXTMaxDataLength = 56;
+
+  static Error getData(const uint8_t *Record, SmallString<256> &CompleteData);
+
+  static void getElementEsdId(const uint8_t *Record, uint32_t &EsdId) {
+    get<uint32_t>(Record, 4, EsdId);
+  }
+
+  static void getOffset(const uint8_t *Record, uint32_t &Offset) {
+    get<uint32_t>(Record, 12, Offset);
+  }
+
+  static void getDataLength(const uint8_t *Record, uint16_t &Length) {
+    get<uint16_t>(Record, 22, Length);
+  }
+};
+
 class HDRRecord : public Record {
 public:
   static Error getData(const uint8_t *Record, SmallString<256> &CompleteData);

diff  --git a/llvm/include/llvm/Object/GOFFObjectFile.h b/llvm/include/llvm/Object/GOFFObjectFile.h
index 7e1ceb95f66723..6871641e97ec8d 100644
--- a/llvm/include/llvm/Object/GOFFObjectFile.h
+++ b/llvm/include/llvm/Object/GOFFObjectFile.h
@@ -29,7 +29,10 @@ namespace llvm {
 namespace object {
 
 class GOFFObjectFile : public ObjectFile {
+  friend class GOFFSymbolRef;
+
   IndexedMap<const uint8_t *> EsdPtrs; // Indexed by EsdId.
+  SmallVector<const uint8_t *, 256> TextPtrs;
 
   mutable DenseMap<uint32_t, std::pair<size_t, std::unique_ptr<char[]>>>
       EsdNamesCache;
@@ -38,7 +41,7 @@ class GOFFObjectFile : public ObjectFile {
   // (EDID, 0)               code, r/o data section
   // (EDID,PRID)             r/w data section
   SmallVector<SectionEntryImpl, 256> SectionList;
-  mutable DenseMap<uint32_t, std::string> SectionDataCache;
+  mutable DenseMap<uint32_t, SmallVector<uint8_t>> SectionDataCache;
 
 public:
   Expected<StringRef> getSymbolName(SymbolRef Symbol) const;
@@ -66,6 +69,10 @@ class GOFFObjectFile : public ObjectFile {
     return true;
   }
 
+  bool isSectionNoLoad(DataRefImpl Sec) const;
+  bool isSectionReadOnlyData(DataRefImpl Sec) const;
+  bool isSectionZeroInit(DataRefImpl Sec) const;
+
 private:
   // SymbolRef.
   Expected<StringRef> getSymbolName(DataRefImpl Symb) const override;
@@ -75,27 +82,24 @@ class GOFFObjectFile : public ObjectFile {
   Expected<uint32_t> getSymbolFlags(DataRefImpl Symb) const override;
   Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override;
   Expected<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
+  uint64_t getSymbolSize(DataRefImpl Symb) const;
 
   const uint8_t *getSymbolEsdRecord(DataRefImpl Symb) const;
   bool isSymbolUnresolved(DataRefImpl Symb) const;
   bool isSymbolIndirect(DataRefImpl Symb) const;
 
   // SectionRef.
-  void moveSectionNext(DataRefImpl &Sec) const override {}
-  virtual Expected<StringRef> getSectionName(DataRefImpl Sec) const override {
-    return StringRef();
-  }
-  uint64_t getSectionAddress(DataRefImpl Sec) const override { return 0; }
-  uint64_t getSectionSize(DataRefImpl Sec) const override { return 0; }
+  void moveSectionNext(DataRefImpl &Sec) const override;
+  virtual Expected<StringRef> getSectionName(DataRefImpl Sec) const override;
+  uint64_t getSectionAddress(DataRefImpl Sec) const override;
+  uint64_t getSectionSize(DataRefImpl Sec) const override;
   virtual Expected<ArrayRef<uint8_t>>
-  getSectionContents(DataRefImpl Sec) const override {
-    return ArrayRef<uint8_t>();
-  }
-  uint64_t getSectionIndex(DataRefImpl Sec) const override { return 0; }
-  uint64_t getSectionAlignment(DataRefImpl Sec) const override { return 0; }
+  getSectionContents(DataRefImpl Sec) const override;
+  uint64_t getSectionIndex(DataRefImpl Sec) const override { return Sec.d.a; }
+  uint64_t getSectionAlignment(DataRefImpl Sec) const override;
   bool isSectionCompressed(DataRefImpl Sec) const override { return false; }
-  bool isSectionText(DataRefImpl Sec) const override { return false; }
-  bool isSectionData(DataRefImpl Sec) const override { return false; }
+  bool isSectionText(DataRefImpl Sec) const override;
+  bool isSectionData(DataRefImpl Sec) const override;
   bool isSectionBSS(DataRefImpl Sec) const override { return false; }
   bool isSectionVirtual(DataRefImpl Sec) const override { return false; }
   relocation_iterator section_rel_begin(DataRefImpl Sec) const override {
@@ -109,6 +113,7 @@ class GOFFObjectFile : public ObjectFile {
   const uint8_t *getSectionPrEsdRecord(DataRefImpl &Sec) const;
   const uint8_t *getSectionEdEsdRecord(uint32_t SectionIndex) const;
   const uint8_t *getSectionPrEsdRecord(uint32_t SectionIndex) const;
+  uint32_t getSectionDefEsdId(DataRefImpl &Sec) const;
 
   // RelocationRef.
   void moveRelocationNext(DataRefImpl &Rel) const override {}
@@ -122,6 +127,29 @@ class GOFFObjectFile : public ObjectFile {
                              SmallVectorImpl<char> &Result) const override {}
 };
 
+class GOFFSymbolRef : public SymbolRef {
+public:
+  GOFFSymbolRef(const SymbolRef &B) : SymbolRef(B) {
+    assert(isa<GOFFObjectFile>(SymbolRef::getObject()));
+  }
+
+  const GOFFObjectFile *getObject() const {
+    return cast<GOFFObjectFile>(BasicSymbolRef::getObject());
+  }
+
+  Expected<uint32_t> getSymbolGOFFFlags() const {
+    return getObject()->getSymbolFlags(getRawDataRefImpl());
+  }
+
+  Expected<SymbolRef::Type> getSymbolGOFFType() const {
+    return getObject()->getSymbolType(getRawDataRefImpl());
+  }
+
+  uint64_t getSize() const {
+    return getObject()->getSymbolSize(getRawDataRefImpl());
+  }
+};
+
 } // namespace object
 
 } // namespace llvm

diff  --git a/llvm/lib/Object/GOFFObjectFile.cpp b/llvm/lib/Object/GOFFObjectFile.cpp
index 76a13559ebfe35..6b48d464dc3ec7 100644
--- a/llvm/lib/Object/GOFFObjectFile.cpp
+++ b/llvm/lib/Object/GOFFObjectFile.cpp
@@ -168,6 +168,11 @@ GOFFObjectFile::GOFFObjectFile(MemoryBufferRef Object, Error &Err)
       LLVM_DEBUG(dbgs() << "  --  ESD " << EsdId << "\n");
       break;
     }
+    case GOFF::RT_TXT:
+      // Save TXT records.
+      TextPtrs.emplace_back(I);
+      LLVM_DEBUG(dbgs() << "  --  TXT\n");
+      break;
     case GOFF::RT_END:
       LLVM_DEBUG(dbgs() << "  --  END (GOFF record type) unhandled\n");
       break;
@@ -364,6 +369,13 @@ GOFFObjectFile::getSymbolSection(DataRefImpl Symb) const {
                                std::to_string(SymEdId));
 }
 
+uint64_t GOFFObjectFile::getSymbolSize(DataRefImpl Symb) const {
+  const uint8_t *Record = getSymbolEsdRecord(Symb);
+  uint32_t Length;
+  ESDRecord::getLength(Record, Length);
+  return Length;
+}
+
 const uint8_t *GOFFObjectFile::getSectionEdEsdRecord(DataRefImpl &Sec) const {
   SectionEntryImpl EsdIds = SectionList[Sec.d.a];
   const uint8_t *EsdRecord = EsdPtrs[EsdIds.d.a];
@@ -394,6 +406,154 @@ GOFFObjectFile::getSectionPrEsdRecord(uint32_t SectionIndex) const {
   return EsdRecord;
 }
 
+uint32_t GOFFObjectFile::getSectionDefEsdId(DataRefImpl &Sec) const {
+  const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec);
+  uint32_t Length;
+  ESDRecord::getLength(EsdRecord, Length);
+  if (Length == 0) {
+    const uint8_t *PrEsdRecord = getSectionPrEsdRecord(Sec);
+    if (PrEsdRecord)
+      EsdRecord = PrEsdRecord;
+  }
+
+  uint32_t DefEsdId;
+  ESDRecord::getEsdId(EsdRecord, DefEsdId);
+  LLVM_DEBUG(dbgs() << "Got def EsdId: " << DefEsdId << '\n');
+  return DefEsdId;
+}
+
+void GOFFObjectFile::moveSectionNext(DataRefImpl &Sec) const {
+  Sec.d.a++;
+  if ((Sec.d.a) >= SectionList.size())
+    Sec.d.a = 0;
+}
+
+Expected<StringRef> GOFFObjectFile::getSectionName(DataRefImpl Sec) const {
+  DataRefImpl EdSym;
+  SectionEntryImpl EsdIds = SectionList[Sec.d.a];
+  EdSym.d.a = EsdIds.d.a;
+  Expected<StringRef> Name = getSymbolName(EdSym);
+  if (Name) {
+    StringRef Res = *Name;
+    LLVM_DEBUG(dbgs() << "Got section: " << Res << '\n');
+    LLVM_DEBUG(dbgs() << "Final section name: " << Res << '\n');
+    Name = Res;
+  }
+  return Name;
+}
+
+uint64_t GOFFObjectFile::getSectionAddress(DataRefImpl Sec) const {
+  uint32_t Offset;
+  const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec);
+  ESDRecord::getOffset(EsdRecord, Offset);
+  return Offset;
+}
+
+uint64_t GOFFObjectFile::getSectionSize(DataRefImpl Sec) const {
+  uint32_t Length;
+  uint32_t DefEsdId = getSectionDefEsdId(Sec);
+  const uint8_t *EsdRecord = EsdPtrs[DefEsdId];
+  ESDRecord::getLength(EsdRecord, Length);
+  LLVM_DEBUG(dbgs() << "Got section size: " << Length << '\n');
+  return static_cast<uint64_t>(Length);
+}
+
+// Unravel TXT records and expand fill characters to produce
+// a contiguous sequence of bytes.
+Expected<ArrayRef<uint8_t>>
+GOFFObjectFile::getSectionContents(DataRefImpl Sec) const {
+  if (SectionDataCache.count(Sec.d.a)) {
+    auto &Buf = SectionDataCache[Sec.d.a];
+    return ArrayRef<uint8_t>(Buf);
+  }
+  uint64_t SectionSize = getSectionSize(Sec);
+  uint32_t DefEsdId = getSectionDefEsdId(Sec);
+
+  const uint8_t *EdEsdRecord = getSectionEdEsdRecord(Sec);
+  bool FillBytePresent;
+  ESDRecord::getFillBytePresent(EdEsdRecord, FillBytePresent);
+  uint8_t FillByte = '\0';
+  if (FillBytePresent)
+    ESDRecord::getFillByteValue(EdEsdRecord, FillByte);
+
+  // Initialize section with fill byte.
+  SmallVector<uint8_t> Data(SectionSize, FillByte);
+
+  // Replace section with content from text records.
+  for (const uint8_t *TxtRecordInt : TextPtrs) {
+    const uint8_t *TxtRecordPtr = TxtRecordInt;
+    uint32_t TxtEsdId;
+    TXTRecord::getElementEsdId(TxtRecordPtr, TxtEsdId);
+    LLVM_DEBUG(dbgs() << "Got txt EsdId: " << TxtEsdId << '\n');
+
+    if (TxtEsdId != DefEsdId)
+      continue;
+
+    uint32_t TxtDataOffset;
+    TXTRecord::getOffset(TxtRecordPtr, TxtDataOffset);
+
+    uint16_t TxtDataSize;
+    TXTRecord::getDataLength(TxtRecordPtr, TxtDataSize);
+
+    LLVM_DEBUG(dbgs() << "Record offset " << TxtDataOffset << ", data size "
+                      << TxtDataSize << "\n");
+
+    SmallString<256> CompleteData;
+    CompleteData.reserve(TxtDataSize);
+    if (Error Err = TXTRecord::getData(TxtRecordPtr, CompleteData))
+      return std::move(Err);
+    assert(CompleteData.size() == TxtDataSize && "Wrong length of data");
+    std::copy(CompleteData.data(), CompleteData.data() + TxtDataSize,
+              Data.begin() + TxtDataOffset);
+  }
+  SectionDataCache[Sec.d.a] = Data;
+  return ArrayRef<uint8_t>(Data);
+}
+
+uint64_t GOFFObjectFile::getSectionAlignment(DataRefImpl Sec) const {
+  const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec);
+  GOFF::ESDAlignment Pow2Alignment;
+  ESDRecord::getAlignment(EsdRecord, Pow2Alignment);
+  return 1 << static_cast<uint64_t>(Pow2Alignment);
+}
+
+bool GOFFObjectFile::isSectionText(DataRefImpl Sec) const {
+  const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec);
+  GOFF::ESDExecutable Executable;
+  ESDRecord::getExecutable(EsdRecord, Executable);
+  return Executable == GOFF::ESD_EXE_CODE;
+}
+
+bool GOFFObjectFile::isSectionData(DataRefImpl Sec) const {
+  const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec);
+  GOFF::ESDExecutable Executable;
+  ESDRecord::getExecutable(EsdRecord, Executable);
+  return Executable == GOFF::ESD_EXE_DATA;
+}
+
+bool GOFFObjectFile::isSectionNoLoad(DataRefImpl Sec) const {
+  const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec);
+  GOFF::ESDLoadingBehavior LoadingBehavior;
+  ESDRecord::getLoadingBehavior(EsdRecord, LoadingBehavior);
+  return LoadingBehavior == GOFF::ESD_LB_NoLoad;
+}
+
+bool GOFFObjectFile::isSectionReadOnlyData(DataRefImpl Sec) const {
+  if (!isSectionData(Sec))
+    return false;
+
+  const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec);
+  GOFF::ESDLoadingBehavior LoadingBehavior;
+  ESDRecord::getLoadingBehavior(EsdRecord, LoadingBehavior);
+  return LoadingBehavior == GOFF::ESD_LB_Initial;
+}
+
+bool GOFFObjectFile::isSectionZeroInit(DataRefImpl Sec) const {
+  // GOFF uses fill characters and fill characters are applied
+  // on getSectionContents() - so we say false to zero init.
+  return false;
+}
+
 section_iterator GOFFObjectFile::section_begin() const {
   DataRefImpl Sec;
   moveSectionNext(Sec);
@@ -476,6 +636,13 @@ Error ESDRecord::getData(const uint8_t *Record,
   return getContinuousData(Record, DataSize, 72, CompleteData);
 }
 
+Error TXTRecord::getData(const uint8_t *Record,
+                         SmallString<256> &CompleteData) {
+  uint16_t Length;
+  getDataLength(Record, Length);
+  return getContinuousData(Record, Length, 24, CompleteData);
+}
+
 Error ENDRecord::getData(const uint8_t *Record,
                          SmallString<256> &CompleteData) {
   uint16_t Length = getNameLength(Record);

diff  --git a/llvm/unittests/Object/GOFFObjectFileTest.cpp b/llvm/unittests/Object/GOFFObjectFileTest.cpp
index 734dac6b8507a7..69f60d016a8081 100644
--- a/llvm/unittests/Object/GOFFObjectFileTest.cpp
+++ b/llvm/unittests/Object/GOFFObjectFileTest.cpp
@@ -502,3 +502,100 @@ TEST(GOFFObjectFileTest, InvalidERSymbolType) {
         FailedWithMessage("ESD record 1 has unknown Executable type 0x03"));
   }
 }
+
+TEST(GOFFObjectFileTest, TXTConstruct) {
+  char GOFFData[GOFF::RecordLength * 6] = {};
+
+  // HDR record.
+  GOFFData[0] = 0x03;
+  GOFFData[1] = 0xF0;
+  GOFFData[50] = 0x01;
+
+  // ESD record.
+  GOFFData[GOFF::RecordLength] = 0x03;
+  GOFFData[GOFF::RecordLength + 7] = 0x01;  // ESDID.
+  GOFFData[GOFF::RecordLength + 71] = 0x05; // Size of symbol name.
+  GOFFData[GOFF::RecordLength + 72] = 0xa5; // Symbol name is v.
+  GOFFData[GOFF::RecordLength + 73] = 0x81; // Symbol name is a.
+  GOFFData[GOFF::RecordLength + 74] = 0x99; // Symbol name is r.
+  GOFFData[GOFF::RecordLength + 75] = 0x7b; // Symbol name is #.
+  GOFFData[GOFF::RecordLength + 76] = 0x83; // Symbol name is c.
+
+  // ESD record.
+  GOFFData[GOFF::RecordLength * 2] = 0x03;
+  GOFFData[GOFF::RecordLength * 2 + 3] = 0x01;
+  GOFFData[GOFF::RecordLength * 2 + 7] = 0x02;  // ESDID.
+  GOFFData[GOFF::RecordLength * 2 + 11] = 0x01; // Parent ESDID.
+  GOFFData[GOFF::RecordLength * 2 + 27] = 0x08; // Length.
+  GOFFData[GOFF::RecordLength * 2 + 40] = 0x01; // Name Space ID.
+  GOFFData[GOFF::RecordLength * 2 + 41] = 0x80;
+  GOFFData[GOFF::RecordLength * 2 + 60] = 0x04; // Size of symbol name.
+  GOFFData[GOFF::RecordLength * 2 + 61] = 0x04; // Size of symbol name.
+  GOFFData[GOFF::RecordLength * 2 + 63] = 0x0a; // Size of symbol name.
+  GOFFData[GOFF::RecordLength * 2 + 66] = 0x03; // Size of symbol name.
+  GOFFData[GOFF::RecordLength * 2 + 71] = 0x08; // Size of symbol name.
+  GOFFData[GOFF::RecordLength * 2 + 72] = 0xc3; // Symbol name is c.
+  GOFFData[GOFF::RecordLength * 2 + 73] = 0x6d; // Symbol name is _.
+  GOFFData[GOFF::RecordLength * 2 + 74] = 0xc3; // Symbol name is c.
+  GOFFData[GOFF::RecordLength * 2 + 75] = 0xd6; // Symbol name is o.
+  GOFFData[GOFF::RecordLength * 2 + 76] = 0xc4; // Symbol name is D.
+  GOFFData[GOFF::RecordLength * 2 + 77] = 0xc5; // Symbol name is E.
+  GOFFData[GOFF::RecordLength * 2 + 78] = 0xf6; // Symbol name is 6.
+  GOFFData[GOFF::RecordLength * 2 + 79] = 0xf4; // Symbol name is 4.
+
+  // ESD record.
+  GOFFData[GOFF::RecordLength * 3] = 0x03;
+  GOFFData[GOFF::RecordLength * 3 + 3] = 0x02;
+  GOFFData[GOFF::RecordLength * 3 + 7] = 0x03;  // ESDID.
+  GOFFData[GOFF::RecordLength * 3 + 11] = 0x02; // Parent ESDID.
+  GOFFData[GOFF::RecordLength * 3 + 71] = 0x05; // Size of symbol name.
+  GOFFData[GOFF::RecordLength * 3 + 72] = 0xa5; // Symbol name is v.
+  GOFFData[GOFF::RecordLength * 3 + 73] = 0x81; // Symbol name is a.
+  GOFFData[GOFF::RecordLength * 3 + 74] = 0x99; // Symbol name is r.
+  GOFFData[GOFF::RecordLength * 3 + 75] = 0x7b; // Symbol name is #.
+  GOFFData[GOFF::RecordLength * 3 + 76] = 0x83; // Symbol name is c.
+
+  // TXT record.
+  GOFFData[GOFF::RecordLength * 4] = 0x03;
+  GOFFData[GOFF::RecordLength * 4 + 1] = 0x10;
+  GOFFData[GOFF::RecordLength * 4 + 7] = 0x02;
+  GOFFData[GOFF::RecordLength * 4 + 23] = 0x08; // Data Length.
+  GOFFData[GOFF::RecordLength * 4 + 24] = 0x12;
+  GOFFData[GOFF::RecordLength * 4 + 25] = 0x34;
+  GOFFData[GOFF::RecordLength * 4 + 26] = 0x56;
+  GOFFData[GOFF::RecordLength * 4 + 27] = 0x78;
+  GOFFData[GOFF::RecordLength * 4 + 28] = 0x9a;
+  GOFFData[GOFF::RecordLength * 4 + 29] = 0xbc;
+  GOFFData[GOFF::RecordLength * 4 + 30] = 0xde;
+  GOFFData[GOFF::RecordLength * 4 + 31] = 0xf0;
+
+  // END record.
+  GOFFData[GOFF::RecordLength * 5] = 0x03;
+  GOFFData[GOFF::RecordLength * 5 + 1] = 0x40;
+  GOFFData[GOFF::RecordLength * 5 + 11] = 0x06;
+
+  StringRef Data(GOFFData, GOFF::RecordLength * 6);
+
+  Expected<std::unique_ptr<ObjectFile>> GOFFObjOrErr =
+      object::ObjectFile::createGOFFObjectFile(
+          MemoryBufferRef(Data, "dummyGOFF"));
+
+  ASSERT_THAT_EXPECTED(GOFFObjOrErr, Succeeded());
+
+  GOFFObjectFile *GOFFObj = dyn_cast<GOFFObjectFile>((*GOFFObjOrErr).get());
+  auto Symbols = GOFFObj->symbols();
+  ASSERT_EQ(std::distance(Symbols.begin(), Symbols.end()), 1);
+  SymbolRef Symbol = *Symbols.begin();
+  Expected<StringRef> SymbolNameOrErr = GOFFObj->getSymbolName(Symbol);
+  ASSERT_THAT_EXPECTED(SymbolNameOrErr, Succeeded());
+  StringRef SymbolName = SymbolNameOrErr.get();
+  EXPECT_EQ(SymbolName, "var#c");
+
+  auto Sections = GOFFObj->sections();
+  ASSERT_EQ(std::distance(Sections.begin(), Sections.end()), 1);
+  SectionRef Section = *Sections.begin();
+  Expected<StringRef> SectionContent = Section.getContents();
+  ASSERT_THAT_EXPECTED(SectionContent, Succeeded());
+  StringRef Contents = SectionContent.get();
+  EXPECT_EQ(Contents, "\x12\x34\x56\x78\x9a\xbc\xde\xf0");
+}


        


More information about the llvm-commits mailing list