[llvm] 0a146a9 - [AIX] asm output: use character literals in byte lists for strings

Hubert Tong via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 29 18:14:49 PDT 2020


Author: Hubert Tong
Date: 2020-09-29T21:14:41-04:00
New Revision: 0a146a9d0bdd54411f0b0712e27481a4c280ae03

URL: https://github.com/llvm/llvm-project/commit/0a146a9d0bdd54411f0b0712e27481a4c280ae03
DIFF: https://github.com/llvm/llvm-project/commit/0a146a9d0bdd54411f0b0712e27481a4c280ae03.diff

LOG: [AIX] asm output: use character literals in byte lists for strings

This patch improves the assembly output produced for string literals by
using character literals in byte lists. This provides the benefits of
having printable characters appear as such in the assembly output and of
having strings kept as logical units on the same line.

Reviewed By: daltenty

Differential Revision: https://reviews.llvm.org/D80953

Added: 
    llvm/test/CodeGen/PowerPC/aix-bytestring.ll

Modified: 
    llvm/include/llvm/MC/MCAsmInfo.h
    llvm/lib/MC/MCAsmInfoXCOFF.cpp
    llvm/lib/MC/MCAsmStreamer.cpp
    llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll
    llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll
    llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h
index 0f9d503045d8..2b889d0ed5fa 100644
--- a/llvm/include/llvm/MC/MCAsmInfo.h
+++ b/llvm/include/llvm/MC/MCAsmInfo.h
@@ -54,6 +54,15 @@ enum LCOMMType { NoAlignment, ByteAlignment, Log2Alignment };
 /// This class is intended to be used as a base class for asm
 /// properties and features specific to the target.
 class MCAsmInfo {
+public:
+  /// Assembly character literal syntax types.
+  enum AsmCharLiteralSyntax {
+    ACLS_Unknown, /// Unknown; character literals not used by LLVM for this
+                  /// target.
+    ACLS_SingleQuotePrefix, /// The desired character is prefixed by a single
+                            /// quote, e.g., `'A`.
+  };
+
 protected:
   //===------------------------------------------------------------------===//
   // Properties to be set by the target writer, used to configure asm printer.
@@ -200,6 +209,16 @@ class MCAsmInfo {
   /// doesn't support this, it can be set to null.  Defaults to "\t.asciz\t"
   const char *AscizDirective;
 
+  /// This directive accepts a comma-separated list of bytes for emission as a
+  /// string of bytes.  For targets that do not support this, it shall be set to
+  /// null.  Defaults to null.
+  const char *ByteListDirective = nullptr;
+
+  /// Form used for character literals in the assembly syntax.  Useful for
+  /// producing strings as byte lists.  If a target does not use or support
+  /// this, it shall be set to ACLS_Unknown.  Defaults to ACLS_Unknown.
+  AsmCharLiteralSyntax CharacterLiteralSyntax = ACLS_Unknown;
+
   /// These directives are used to output some unit of integer data to the
   /// current section.  If a data directive is set to null, smaller data
   /// directives will be used to emit the large sizes.  Defaults to "\t.byte\t",
@@ -562,6 +581,10 @@ class MCAsmInfo {
   }
   const char *getAsciiDirective() const { return AsciiDirective; }
   const char *getAscizDirective() const { return AscizDirective; }
+  const char *getByteListDirective() const { return ByteListDirective; }
+  AsmCharLiteralSyntax characterLiteralSyntax() const {
+    return CharacterLiteralSyntax;
+  }
   bool getAlignmentIsInBytes() const { return AlignmentIsInBytes; }
   unsigned getTextAlignFillValue() const { return TextAlignFillValue; }
   const char *getGlobalDirective() const { return GlobalDirective; }

diff  --git a/llvm/lib/MC/MCAsmInfoXCOFF.cpp b/llvm/lib/MC/MCAsmInfoXCOFF.cpp
index b5c5bb3ace8e..04982af4af31 100644
--- a/llvm/lib/MC/MCAsmInfoXCOFF.cpp
+++ b/llvm/lib/MC/MCAsmInfoXCOFF.cpp
@@ -24,6 +24,8 @@ MCAsmInfoXCOFF::MCAsmInfoXCOFF() {
   ZeroDirectiveSupportsNonZeroValue = false;
   AsciiDirective = nullptr; // not supported
   AscizDirective = nullptr; // not supported
+  ByteListDirective = "\t.byte\t";
+  CharacterLiteralSyntax = ACLS_SingleQuotePrefix;
 
   // Use .vbyte for data definition to avoid directives that apply an implicit
   // alignment.

diff  --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 647197d8de4d..8d96935b2205 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -971,6 +971,47 @@ void MCAsmStreamer::emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
 
 static inline char toOctal(int X) { return (X&7)+'0'; }
 
+static void PrintByteList(StringRef Data, raw_ostream &OS,
+                          MCAsmInfo::AsmCharLiteralSyntax ACLS) {
+  assert(!Data.empty() && "Cannot generate an empty list.");
+  const auto printCharacterInOctal = [&OS](unsigned char C) {
+    OS << '0';
+    OS << toOctal(C >> 6);
+    OS << toOctal(C >> 3);
+    OS << toOctal(C >> 0);
+  };
+  const auto printOneCharacterFor = [printCharacterInOctal](
+                                        auto printOnePrintingCharacter) {
+    return [printCharacterInOctal, printOnePrintingCharacter](unsigned char C) {
+      if (isPrint(C)) {
+        printOnePrintingCharacter(static_cast<char>(C));
+        return;
+      }
+      printCharacterInOctal(C);
+    };
+  };
+  const auto printCharacterList = [Data, &OS](const auto &printOneCharacter) {
+    const auto BeginPtr = Data.begin(), EndPtr = Data.end();
+    for (const unsigned char C : make_range(BeginPtr, EndPtr - 1)) {
+      printOneCharacter(C);
+      OS << ',';
+    }
+    printOneCharacter(*(EndPtr - 1));
+  };
+  switch (ACLS) {
+  case MCAsmInfo::ACLS_Unknown:
+    printCharacterList(printCharacterInOctal);
+    return;
+  case MCAsmInfo::ACLS_SingleQuotePrefix:
+    printCharacterList(printOneCharacterFor([&OS](char C) {
+      const char AsmCharLitBuf[2] = {'\'', C};
+      OS << StringRef(AsmCharLitBuf, sizeof(AsmCharLitBuf));
+    }));
+    return;
+  }
+  llvm_unreachable("Invalid AsmCharLiteralSyntax value!");
+}
+
 static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
   OS << '"';
 
@@ -1009,33 +1050,42 @@ void MCAsmStreamer::emitBytes(StringRef Data) {
          "Cannot emit contents before setting section!");
   if (Data.empty()) return;
 
-  // If only single byte is provided or no ascii or asciz directives is
-  // supported, emit as vector of 8bits data.
-  if (Data.size() == 1 ||
-      !(MAI->getAscizDirective() || MAI->getAsciiDirective())) {
-    if (MCTargetStreamer *TS = getTargetStreamer()) {
-      TS->emitRawBytes(Data);
+  const auto emitAsString = [this](StringRef Data) {
+    // If the data ends with 0 and the target supports .asciz, use it, otherwise
+    // use .ascii or a byte-list directive
+    if (MAI->getAscizDirective() && Data.back() == 0) {
+      OS << MAI->getAscizDirective();
+      Data = Data.substr(0, Data.size() - 1);
+    } else if (LLVM_LIKELY(MAI->getAsciiDirective())) {
+      OS << MAI->getAsciiDirective();
+    } else if (MAI->getByteListDirective()) {
+      OS << MAI->getByteListDirective();
+      PrintByteList(Data, OS, MAI->characterLiteralSyntax());
+      EmitEOL();
+      return true;
     } else {
-      const char *Directive = MAI->getData8bitsDirective();
-      for (const unsigned char C : Data.bytes()) {
-        OS << Directive << (unsigned)C;
-        EmitEOL();
-      }
+      return false;
     }
+
+    PrintQuotedString(Data, OS);
+    EmitEOL();
+    return true;
+  };
+
+  if (Data.size() != 1 && emitAsString(Data))
     return;
-  }
 
-  // If the data ends with 0 and the target supports .asciz, use it, otherwise
-  // use .ascii
-  if (MAI->getAscizDirective() && Data.back() == 0) {
-    OS << MAI->getAscizDirective();
-    Data = Data.substr(0, Data.size()-1);
-  } else {
-    OS << MAI->getAsciiDirective();
+  // Only single byte is provided or no ascii, asciz, or byte-list directives
+  // are applicable. Emit as vector of individual 8bits data elements.
+  if (MCTargetStreamer *TS = getTargetStreamer()) {
+    TS->emitRawBytes(Data);
+    return;
+  }
+  const char *Directive = MAI->getData8bitsDirective();
+  for (const unsigned char C : Data.bytes()) {
+    OS << Directive << (unsigned)C;
+    EmitEOL();
   }
-
-  PrintQuotedString(Data, OS);
-  EmitEOL();
 }
 
 void MCAsmStreamer::emitBinaryData(StringRef Data) {

diff  --git a/llvm/test/CodeGen/PowerPC/aix-bytestring.ll b/llvm/test/CodeGen/PowerPC/aix-bytestring.ll
new file mode 100644
index 000000000000..443c019c9e30
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-bytestring.ll
@@ -0,0 +1,7 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s
+
+ at str = constant [256 x i8] c"\01\02\03\04\05\06\07\08\09\0A\0B\0C\0D\0E\0F\10\11\12\13\14\15\16\17\18\19\1A\1B\1C\1D\1E\1F !\22#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\7F\80\81\82\83\84\85\86\87\88\89\8A\8B\8C\8D\8E\8F\90\91\92\93\94\95\96\97\98\99\9A\9B\9C\9D\9E\9F\A0\A1\A2\A3\A4\A5\A6\A7\A8\A9\AA\AB\AC\AD\AE\AF\B0\B1\B2\B3\B4\B5\B6\B7\B8\B9\BA\BB\BC\BD\BE\BF\C0\C1\C2\C3\C4\C5\C6\C7\C8\C9\CA\CB\CC\CD\CE\CF\D0\D1\D2\D3\D4\D5\D6\D7\D8\D9\DA\DB\DC\DD\DE\DF\E0\E1\E2\E3\E4\E5\E6\E7\E8\E9\EA\EB\EC\ED\EE\EF\F0\F1\F2\F3\F4\F5\F6\F7\F8\F9\FA\FB\FC\FD\FE\FF\00", align 1
+
+; CHECK-LABEL:str:
+;  CHECK-NEXT:        .byte   0001,0002,0003,0004,0005,0006,0007,0010,0011,0012,0013,0014,0015,0016,0017,0020,0021,0022,0023,0024,0025,0026,0027,0030,0031,0032,0033,0034,0035,0036,0037,' ,'!,'",'#,'$,'%,'&,'','(,'),'*,'+,',,'-,'.,'/,'0,'1,'2,'3,'4,'5,'6,'7,'8,'9,':,';,'<,'=,'>,'?,'@,'A,'B,'C,'D,'E,'F,'G,'H,'I,'J,'K,'L,'M,'N,'O,'P,'Q,'R,'S,'T,'U,'V,'W,'X,'Y,'Z,'[,'\,'],'^,'_,'`,'a,'b,'c,'d,'e,'f,'g,'h,'i,'j,'k,'l,'m,'n,'o,'p,'q,'r,'s,'t,'u,'v,'w,'x,'y,'z,'{,'|,'},'~,0177,0200,0201,0202,0203,0204,0205,0206,0207,0210,0211,0212,0213,0214,0215,0216,0217,0220,0221,0222,0223,0224,0225,0226,0227,0230,0231,0232,0233,0234,0235,0236,0237,0240,0241,0242,0243,0244,0245,0246,0247,0250,0251,0252,0253,0254,0255,0256,0257,0260,0261,0262,0263,0264,0265,0266,0267,0270,0271,0272,0273,0274,0275,0276,0277,0300,0301,0302,0303,0304,0305,0306,0307,0310,0311,0312,0313,0314,0315,0316,0317,0320,0321,0322,0323,0324,0325,0326,0327,0330,0331,0332,0333,0334,0335,0336,0337,0340,0341,0342,0343,0344,0345,0346,0347,0350,0351,0352,0353,0354,0355,0356,0357,0360,0361,0362,0363,0364,0365,0366,0367,0370,0371,0372,0373,0374,0375,0376,0377,0000

diff  --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll
index 4083bd58fe98..88c8b08bdb59 100644
--- a/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll
@@ -86,10 +86,7 @@
 
 ; CHECK:      .globl  chrarray
 ; CHECK-NEXT: chrarray:
-; CHECK-NEXT: .byte   97
-; CHECK-NEXT: .byte   98
-; CHECK-NEXT: .byte   99
-; CHECK-NEXT: .byte   100
+; CHECK-NEXT: .byte   'a,'b,'c,'d
 
 ; CHECK:      .globl  dblarr
 ; CHECK-NEXT: .align  3

diff  --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll
index 42ead4b9b4de..0d29857fd155 100644
--- a/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll
@@ -41,30 +41,9 @@ entry:
 ; CHECK-NEXT:   .vbyte	4, 0                       # 0x0
 ; CHECK-NEXT:   .csect .rodata.str1.1[RO],2
 ; CHECK-NEXT: L..strA:
-; CHECK-NEXT: .byte   104
-; CHECK-NEXT: .byte   101
-; CHECK-NEXT: .byte   108
-; CHECK-NEXT: .byte   108
-; CHECK-NEXT: .byte   111
-; CHECK-NEXT: .byte   32
-; CHECK-NEXT: .byte   119
-; CHECK-NEXT: .byte   111
-; CHECK-NEXT: .byte   114
-; CHECK-NEXT: .byte   108
-; CHECK-NEXT: .byte   100
-; CHECK-NEXT: .byte   33
-; CHECK-NEXT: .byte   10
-; CHECK-NEXT: .byte   0
+; CHECK-NEXT: .byte   'h,'e,'l,'l,'o,' ,'w,'o,'r,'l,'d,'!,0012,0000
 ; CHECK-NEXT: L...str:
-; CHECK-NEXT: .byte   97
-; CHECK-NEXT: .byte   98
-; CHECK-NEXT: .byte   99
-; CHECK-NEXT: .byte   100
-; CHECK-NEXT: .byte   101
-; CHECK-NEXT: .byte   102
-; CHECK-NEXT: .byte   103
-; CHECK-NEXT: .byte   104
-; CHECK-NEXT: .byte   0
+; CHECK-NEXT: .byte   'a,'b,'c,'d,'e,'f,'g,'h,0000
 
 ; CHECKOBJ:     00000010 <.rodata.str2.2>:
 ; CHECKOBJ-NEXT:       10: 01 08 01 10

diff  --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll
index dddbe2ba089e..a7bb01896642 100644
--- a/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll
@@ -53,10 +53,7 @@
 ; CHECK64-NEXT:        .vbyte	8, 0x408c200000000000
 ; CHECK-NEXT:          .globl  const_chrarray
 ; CHECK-NEXT:  const_chrarray:
-; CHECK-NEXT:          .byte   97
-; CHECK-NEXT:          .byte   98
-; CHECK-NEXT:          .byte   99
-; CHECK-NEXT:          .byte   100
+; CHECK-NEXT:          .byte   'a,'b,'c,'d
 ; CHECK-NEXT:          .globl  const_dblarr
 ; CHECK-NEXT:          .align  3
 ; CHECK-NEXT:  const_dblarr:


        


More information about the llvm-commits mailing list