[llvm] 1e45136 - Revert "[NVPTX] Use the mask() operator to initialize packed structs with pointers"

Mon Jul 18 09:09:06 PDT 2022

Author: Igor Kudrin
Date: 2022-07-18T20:08:39+04:00
New Revision: 1e451369d2017830d3dbddec24063170b7aca0de

URL: https://github.com/llvm/llvm-project/commit/1e451369d2017830d3dbddec24063170b7aca0de
DIFF: https://github.com/llvm/llvm-project/commit/1e451369d2017830d3dbddec24063170b7aca0de.diff

LOG: Revert "[NVPTX] Use the mask() operator to initialize packed structs with pointers"

The new test fails on BE hosts.

This reverts commit 04e978ccba1e6c8b600b2fbad1a82b4b64ffc34b.

Added: 
    

Modified: 
    llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
    llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
    llvm/lib/Target/NVPTX/NVPTXSubtarget.h

Removed: 
    llvm/test/CodeGen/NVPTX/packed-aggr.ll


################################################################################
diff  --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 4a2d9c40d4240..422e47b3d97d7 100644

--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -75,7 +75,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MachineValueType.h"
-#include "llvm/Support/NativeFormatting.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
@@ -1173,26 +1172,11 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
           bufferAggregateConstant(Initializer, &aggBuffer);
           if (aggBuffer.numSymbols()) {
             unsigned int ptrSize = MAI->getCodePointerSize();
-            if (ElementSize % ptrSize ||
-                !aggBuffer.allSymbolsAligned(ptrSize)) {
-              // Print in bytes and use the mask() operator for pointers.
-              if (!STI.hasMaskOperator())
-                report_fatal_error(
-                    "initialized packed aggregate with pointers '" +
-                    GVar->getName() +
-                    "' requires at least PTX ISA version 7.1");
-              O << " .u8 ";
-              getSymbol(GVar)->print(O, MAI);
-              O << "[" << ElementSize << "] = {";
-              aggBuffer.printBytes(O);
-              O << "}";
-            } else {
-              O << " .u" << ptrSize * 8 << " ";
-              getSymbol(GVar)->print(O, MAI);
-              O << "[" << ElementSize / ptrSize << "] = {";
-              aggBuffer.printWords(O);
-              O << "}";
-            }
+            O << " .u" << ptrSize * 8 << " ";
+            getSymbol(GVar)->print(O, MAI);
+            O << "[" << ElementSize / ptrSize << "] = {";
+            aggBuffer.printWords(O);
+            O << "}";
           } else {
             O << " .b8 ";
             getSymbol(GVar)->print(O, MAI);
@@ -1249,33 +1233,10 @@ void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
 }
 
 void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
-  unsigned int ptrSize = AP.MAI->getCodePointerSize();
-  symbolPosInBuffer.push_back(size);
-  unsigned int nSym = 0;
-  unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
-  for (unsigned int pos = 0; pos < size;) {
+  for (unsigned int pos = 0; pos < size; ++pos) {
     if (pos)
       os << ", ";
-    if (pos != nextSymbolPos) {
-      os << (unsigned int)buffer[pos];
-      ++pos;
-      continue;
-    }
-    // Generate a per-byte mask() operator for the symbol, which looks like:
-    //   .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
-    // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
-    std::string symText;
-    llvm::raw_string_ostream oss(symText);
-    printSymbol(nSym, oss);
-    for (unsigned i = 0; i < ptrSize; ++i) {
-      if (i)
-        os << ", ";
-      llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
-      os << "(" << symText << ")";
-    }
-    pos += ptrSize;
-    nextSymbolPos = symbolPosInBuffer[++nSym];
-    assert(nextSymbolPos >= pos);
+    os << (unsigned int)buffer[pos];
   }
 }
 

diff  --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 710c089e33251..ebf9e29b5a627 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -61,31 +61,26 @@ class MCOperand;
 class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
 
   class AggBuffer {
-    // Used to buffer the emitted string for initializing global aggregates.
+    // Used to buffer the emitted string for initializing global
+    // aggregates.
     //
-    // Normally an aggregate (array, vector, or structure) is emitted as a u8[].
-    // However, if either element/field of the aggregate is a non-NULL address,
-    // and all such addresses are properly aligned, then the aggregate is
-    // emitted as u32[] or u64[]. In the case of unaligned addresses, the
-    // aggregate is emitted as u8[], and the mask() operator is used for all
-    // pointers.
+    // Normally an aggregate (array, vector or structure) is emitted
+    // as a u8[]. However, if one element/field of the aggregate
+    // is a non-NULL address, then the aggregate is emitted as u32[]
+    // or u64[].
     //
-    // We first layout the aggregate in 'buffer' in bytes, except for those
-    // symbol addresses. For the i-th symbol address in the aggregate, its
-    // corresponding 4-byte or 8-byte elements in 'buffer' are filled with 0s.
-    // symbolPosInBuffer[i-1] records its position in 'buffer', and Symbols[i-1]
-    // records the Value*.
+    // We first layout the aggregate in 'buffer' in bytes, except for
+    // those symbol addresses. For the i-th symbol address in the
+    //aggregate, its corresponding 4-byte or 8-byte elements in 'buffer'
+    // are filled with 0s. symbolPosInBuffer[i-1] records its position
+    // in 'buffer', and Symbols[i-1] records the Value*.
     //
-    // Once we have this AggBuffer setup, we can choose how to print it out.
+    // Once we have this AggBuffer setup, we can choose how to print
+    // it out.
   public:
     // number of symbol addresses
     unsigned numSymbols() const { return Symbols.size(); }
 
-    bool allSymbolsAligned(unsigned ptrSize) const {
-      return llvm::all_of(symbolPosInBuffer,
-                          [=](unsigned pos) { return pos % ptrSize == 0; });
-    }
-
   private:
     const unsigned size;   // size of the buffer in bytes
     std::vector<unsigned char> buffer; // the buffer

diff  --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index cea3dce3f1c55..9a249d3da3d56 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -77,7 +77,6 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   bool hasImageHandles() const;
   bool hasFP16Math() const { return SmVersion >= 53; }
   bool allowFP16Math() const;
-  bool hasMaskOperator() const { return PTXVersion >= 71; }
   unsigned int getSmVersion() const { return SmVersion; }
   std::string getTargetName() const { return TargetName; }
 

diff  --git a/llvm/test/CodeGen/NVPTX/packed-aggr.ll b/llvm/test/CodeGen/NVPTX/packed-aggr.ll
deleted file mode 100644
index dba888a87e483..0000000000000
--- a/llvm/test/CodeGen/NVPTX/packed-aggr.ll
+++ /dev/null
@@ -1,95 +0,0 @@
-; RUN: not --crash llc < %s -march=nvptx -mcpu=sm_20 -mattr=+ptx70 2>&1 | \
-; RUN:   FileCheck %s --check-prefix=ERROR
-
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 -mattr=+ptx71 | \
-; RUN:   FileCheck %s --check-prefixes=CHECK,CHECK32
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx71 | \
-; RUN:   FileCheck %s --check-prefixes=CHECK,CHECK64
-; RUN: %if ptxas-11.1 %{ llc < %s -march=nvptx -mcpu=sm_20 -mattr=+ptx71 | %ptxas-verify %}
-; RUN: %if ptxas-11.1 %{ llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx71 | %ptxas-verify %}
-
-;; Test that packed structs with symbol references are represented using the
-;; mask() operator.
-
-declare void @func()
- at p = addrspace(1) global i8 0
-; CHECK: .extern .func func
-; CHECK: .u8 p;
-
-%t1 = type <{ i16, i8*, i8, void ()*, i8*, i32 }>
- at s1 = addrspace(1) global %t1 <{
-; ERROR: initialized packed aggregate with pointers 's1' requires at least PTX ISA version 7.1
-; CHECK32: .global .align 1 .u8 s1[19] = {
-; CHECK64: .global .align 1 .u8 s1[31] = {
-    i16 12,
-; CHECK-SAME:   12, 0,
-    i8* addrspacecast (i8 addrspace(1)* @p to i8*),
-; CHECK-SAME:   0xFF(generic(p)), 0xFF00(generic(p)), 0xFF0000(generic(p)), 0xFF000000(generic(p)),
-; CHECK64-SAME: 0xFF00000000(generic(p)), 0xFF0000000000(generic(p)), 0xFF000000000000(generic(p)), 0xFF00000000000000(generic(p)),
-    i8 34,
-; CHECK-SAME:   34
-    void ()* @func,
-; CHECK-SAME:   0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func),
-; CHECK64-SAME: 0xFF00000000(func), 0xFF0000000000(func), 0xFF000000000000(func), 0xFF00000000000000(func),
-    i8* addrspacecast (i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* @p, i32 3) to i8*),
-; CHECK-SAME:   0xFF(generic(p)+3), 0xFF00(generic(p)+3), 0xFF0000(generic(p)+3), 0xFF000000(generic(p)+3),
-; CHECK64-SAME: 0xFF00000000(generic(p)+3), 0xFF0000000000(generic(p)+3), 0xFF000000000000(generic(p)+3), 0xFF00000000000000(generic(p)+3),
-    i32 56 }>, align 1
-; CHECK-SAME:   56, 0, 0, 0};
-
-;; Test a case than an unaligned pointer is in a nested struct.
-
-%t2i = type <{ void ()* }>
-%t2o = type { i8, %t2i, i32 }
- at s2 = addrspace(1) global %t2o {
-; CHECK32: .global .align 8 .u8 s2[12] = {
-; CHECK64: .global .align 8 .u8 s2[16] = {
-    i8 12,
-; CHECK-SAME:   12,
-    %t2i <{ void()* @func }>,
-; CHECK-SAME:   0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func),
-; CHECK64-SAME: 0xFF00000000(func), 0xFF0000000000(func), 0xFF000000000000(func), 0xFF00000000000000(func),
-    i32 34}
-; CHECK-SAME:   0, 0, 0,
-; CHECK-SAME:   34, 0, 0, 0};
-
-;; Test that a packed struct which size is not multiple of the pointer size
-;; is printed in bytes and uses the mask() operator for pointers even though
-;; the pointers are aligned.
-
-%t3 = type <{ void ()*, i8 }>
- at s3 = addrspace(1) global %t3 <{
-; CHECK32: .global .align 1 .u8 s3[5] = {
-; CHECK64: .global .align 1 .u8 s3[9] = {
-    void ()* @func,
-; CHECK-SAME:   0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func),
-; CHECK64-SAME: 0xFF00000000(func), 0xFF0000000000(func), 0xFF000000000000(func), 0xFF00000000000000(func),
-    i8 56 }>, align 1
-; CHECK-SAME:   56};
-
-;; Test that a packed struct with aligned pointers is printed in words.
-
-%t4 = type <{ void ()*, i64 }>
- at s4 = addrspace(1) global %t4 <{
-; CHECK32: .global .align 1 .u32 s4[3] = {
-; CHECK64: .global .align 1 .u64 s4[2] = {
-    void()* @func,
-; CHECK-SAME:   func,
-    i64 15}>, align 1
-; CHECK32-SAME: 15, 0};
-; CHECK64-SAME: 15};
-
-;; Test that a packed struct with unaligned pointers inside an array is handled.
-
-%t5 = type <{ void ()*, i16 }>
- at a5 = addrspace(1) global [2 x %t5] [%t5 <{ void()* @func, i16 5 }>, %t5 <{ void()* @func, i16 9 }> ]
-; CHECK32: .global .align 8 .u8 a5[12] = {
-; CHECK32-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func), 5, 0,
-; CHECK32-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func), 9, 0};
-; CHECK64: .global .align 8 .u8 a5[20] = {
-; CHECK64-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func),
-; CHECK64-SAME: 0xFF00000000(func), 0xFF0000000000(func), 0xFF000000000000(func), 0xFF00000000000000(func),
-; CHECK64-SAME: 5, 0,
-; CHECK64-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func),
-; CHECK64-SAME: 0xFF00000000(func), 0xFF0000000000(func), 0xFF000000000000(func), 0xFF00000000000000(func),
-; CHECK64-SAME: 9, 0};