[clang] de0e311 - [SystemZ] Improve handling of vector alignments.

Thu Sep 8 08:34:44 PDT 2022

Author: Jonas Paulsson
Date: 2022-09-08T17:33:05+02:00
New Revision: de0e3117d40af95993b11d0622f9700415552d48

URL: https://github.com/llvm/llvm-project/commit/de0e3117d40af95993b11d0622f9700415552d48
DIFF: https://github.com/llvm/llvm-project/commit/de0e3117d40af95993b11d0622f9700415552d48.diff

LOG: [SystemZ] Improve handling of vector alignments.

Make the DataLayout string always hold a vector alignment of 8 bytes,
regardless of the vector ABI. This makes the datalayout depend only on the
target triple which is the general expectation (in assertions).

On older architectures where vectors use the natural alignment (16 bytes),
the front end will maintain the same behavior and produce an overalignment
compared to the datalayout.

Reviewed By: uweigand

Differential Revision: https://reviews.llvm.org/D131158

Added: 
    clang/test/CodeGen/SystemZ/align-systemz-02.c

Modified: 
    clang/lib/Basic/Targets/SystemZ.h
    clang/test/CodeGen/target-data.c
    llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
    llvm/test/CodeGen/SystemZ/function-attributes-01.ll
    llvm/test/CodeGen/SystemZ/vec-abi-align.ll

Removed: 
    


################################################################################
diff  --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h
index 7def024d07a7..371eb2516378 100644

--- a/clang/lib/Basic/Targets/SystemZ.h
+++ b/clang/lib/Basic/Targets/SystemZ.h
@@ -51,13 +51,13 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
       // All vector types are default aligned on an 8-byte boundary, even if the
       // vector facility is not available. That is 
diff erent from Linux.
       MaxVectorAlign = 64;
-      // Compared to Linux/ELF, the data layout 
diff ers only in some details:
-      // - name mangling is GOFF
-      // - 128 bit vector types are 64 bit aligned
+      // Compared to Linux/ELF, the data layout 
diff ers only in that name
+      // mangling is GOFF.
       resetDataLayout(
           "E-m:l-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64");
     } else
-      resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64");
+      resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64"
+                      "-v128:64-a:8:16-n32:64");
     MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
     HasStrictFP = true;
   }
@@ -171,12 +171,14 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
     }
     HasVector &= !SoftFloat;
 
-    // If we use the vector ABI, vector types are 64-bit aligned.
-    if (HasVector && !getTriple().isOSzOS()) {
+    // If we use the vector ABI, vector types are 64-bit aligned. The
+    // DataLayout string is always set to this alignment as it is not a
+    // requirement that it follows the alignment emitted by the front end. It
+    // is assumed generally that the Datalayout should reflect only the
+    // target triple and not any specific feature.
+    if (HasVector && !getTriple().isOSzOS())
       MaxVectorAlign = 64;
-      resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64"
-                      "-v128:64-a:8:16-n32:64");
-    }
+
     return true;
   }
 

diff  --git a/clang/test/CodeGen/SystemZ/align-systemz-02.c b/clang/test/CodeGen/SystemZ/align-systemz-02.c
new file mode 100644
index 000000000000..cc09a9093741
--- /dev/null
+++ b/clang/test/CodeGen/SystemZ/align-systemz-02.c
@@ -0,0 +1,68 @@
+// RUN: %clang_cc1 -triple s390x-linux-gnu %s -o - -target-feature +vector -emit-llvm \
+// RUN:    | FileCheck %s -check-prefix=VECIR
+// RUN: %clang_cc1 -triple s390x-linux-gnu %s -o - -target-feature +vector -emit-obj -S \
+// RUN:    | FileCheck %s -check-prefix=VECASM
+// RUN: %clang_cc1 -triple s390x-linux-gnu %s -o - -target-feature -vector -emit-llvm \
+// RUN:    | FileCheck %s -check-prefix=SCALIR
+// RUN: %clang_cc1 -triple s390x-linux-gnu %s -o - -target-feature -vector -emit-obj -S \
+// RUN:    | FileCheck %s -check-prefix=SCALASM
+
+typedef __attribute__((vector_size(16))) signed int vec_sint;
+
+volatile vec_sint GlobVsi;
+
+struct S {
+  int A;
+  vec_sint Vsi;
+} GlobS;
+
+void fun() {
+  GlobS.Vsi = GlobVsi;
+}
+
+// VECIR: %struct.S = type { i32, <4 x i32> }
+// VECIR: @GlobVsi = global <4 x i32> zeroinitializer, align 8
+// VECIR: @GlobS = global %struct.S zeroinitializer, align 8
+// VECIR: %0 = load volatile <4 x i32>, ptr @GlobVsi, align 8
+// VECIR: store <4 x i32> %0, ptr getelementptr inbounds (%struct.S, ptr @GlobS, i32 0, i32 1), align 8
+
+// VECASM:      lgrl %r1, GlobVsi at GOT
+// VECASM-NEXT: vl   %v0, 0(%r1), 3
+// VECASM-NEXT: lgrl %r1, GlobS at GOT
+// VECASM-NEXT: vst  %v0, 8(%r1), 3
+//
+// VECASM:   .globl  GlobVsi
+// VECASM:   .p2align        3
+// VECASM: GlobVsi:
+// VECASM:   .space  16
+// VECASM:   .globl  GlobS
+// VECASM:   .p2align        3
+// VECASM: GlobS:
+// VECASM:   .space  24
+
+// SCALIR: %struct.S = type { i32, [12 x i8], <4 x i32> }
+// SCALIR: @GlobVsi = global <4 x i32> zeroinitializer, align 16
+// SCALIR: @GlobS = global %struct.S zeroinitializer, align 16
+// SCALIR: %0 = load volatile <4 x i32>, ptr @GlobVsi, align 16
+// SCALIR: store <4 x i32> %0, ptr getelementptr inbounds (%struct.S, ptr @GlobS, i32 0, i32 2), align 16
+
+// SCALASM:      lgrl    %r1, GlobVsi at GOT
+// SCALASM-NEXT: l       %r0, 0(%r1)
+// SCALASM-NEXT: l       %r2, 4(%r1)
+// SCALASM-NEXT: l       %r3, 8(%r1)
+// SCALASM-NEXT: l       %r4, 12(%r1)
+// SCALASM-NEXT: lgrl    %r1, GlobS at GOT
+// SCALASM-NEXT: st      %r4, 28(%r1)
+// SCALASM-NEXT: st      %r3, 24(%r1)
+// SCALASM-NEXT: st      %r2, 20(%r1)
+// SCALASM-NEXT: st      %r0, 16(%r1)
+//
+// SCALASM:   .globl  GlobVsi
+// SCALASM:   .p2align        4
+// SCALASM: GlobVsi:
+// SCALASM:   .space  16
+// SCALASM:   .globl  GlobS
+// SCALASM:   .p2align        4
+// SCALASM: GlobS:
+// SCALASM:   .space  32
+

diff  --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c
index e4150837279c..e5ee17bc38a7 100644
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -223,7 +223,7 @@
 // RUN: FileCheck %s -check-prefix=SYSTEMZ
 // RUN: %clang_cc1 -triple s390x-unknown -target-cpu z13 -target-feature +soft-float -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=SYSTEMZ
-// SYSTEMZ: target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"
+// SYSTEMZ: target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
 
 // RUN: %clang_cc1 -triple s390x-unknown -target-cpu z13 -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=SYSTEMZ-VECTOR

diff  --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 31f8ee2f894d..8c1be7d4949d 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -42,37 +42,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTarget() {
   initializeSystemZTDCPassPass(PR);
 }
 
-// Determine whether we use the vector ABI.
-static bool UsesVectorABI(StringRef CPU, StringRef FS) {
-  // We use the vector ABI whenever the vector facility is avaiable.
-  // This is the case by default if CPU is z13 or later, and can be
-  // overridden via "[+-]vector" feature string elements.
-  bool VectorABI = true;
-  bool SoftFloat = false;
-  if (CPU.empty() || CPU == "generic" ||
-      CPU == "z10" || CPU == "z196" || CPU == "zEC12" ||
-      CPU == "arch8" || CPU == "arch9" || CPU == "arch10")
-    VectorABI = false;
-
-  SmallVector<StringRef, 3> Features;
-  FS.split(Features, ',', -1, false /* KeepEmpty */);
-  for (auto &Feature : Features) {
-    if (Feature == "vector" || Feature == "+vector")
-      VectorABI = true;
-    if (Feature == "-vector")
-      VectorABI = false;
-    if (Feature == "soft-float" || Feature == "+soft-float")
-      SoftFloat = true;
-    if (Feature == "-soft-float")
-      SoftFloat = false;
-  }
-
-  return VectorABI && !SoftFloat;
-}
-
-static std::string computeDataLayout(const Triple &TT, StringRef CPU,
-                                     StringRef FS) {
-  bool VectorABI = UsesVectorABI(CPU, FS);
+static std::string computeDataLayout(const Triple &TT) {
   std::string Ret;
 
   // Big endian.
@@ -92,10 +62,9 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
   // 128-bit floats are aligned only to 64 bits.
   Ret += "-f128:64";
 
-  // When using the vector ABI on Linux, 128-bit vectors are also aligned to 64
-  // bits. On z/OS, vector types are always aligned to 64 bits.
-  if (VectorABI || TT.isOSzOS())
-    Ret += "-v128:64";
+  // The DataLayout string always holds a vector alignment of 64 bits, see
+  // comment in clang/lib/Basic/Targets/SystemZ.h.
+  Ret += "-v128:64";
 
   // We prefer 16 bits of aligned for all globals; see above.
   Ret += "-a:8:16";
@@ -174,7 +143,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,
                                            Optional<CodeModel::Model> CM,
                                            CodeGenOpt::Level OL, bool JIT)
     : LLVMTargetMachine(
-          T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options,
+          T, computeDataLayout(TT), TT, CPU, FS, Options,
           getEffectiveRelocModel(RM),
           getEffectiveSystemZCodeModel(CM, getEffectiveRelocModel(RM), JIT),
           OL),

diff  --git a/llvm/test/CodeGen/SystemZ/function-attributes-01.ll b/llvm/test/CodeGen/SystemZ/function-attributes-01.ll
index e8f9d359722f..99d96a6cd05e 100644
--- a/llvm/test/CodeGen/SystemZ/function-attributes-01.ll
+++ b/llvm/test/CodeGen/SystemZ/function-attributes-01.ll
@@ -38,11 +38,11 @@ entry:
 attributes #3 = { "target-cpu"="z14" "target-features"="+vector" "use-soft-float"="false" }
 define <2 x double> @fun3(<2 x double>* %A) #3 {
 ; CHECK-LABEL:     fun3:
-; DEFAULT:         vl %v24, 0(%r2), 4
+; DEFAULT:         vl %v24, 0(%r2), 3
 ; SOFT-FLOAT:      lg %r0, 0(%r2)
 ; SOFT-FLOAT-NEXT: lg %r3, 8(%r2)
 ; SOFT-FLOAT-NEXT: lgr %r2, %r0
-; NO-SOFT-FL:      vl %v24, 0(%r2), 4
+; NO-SOFT-FL:      vl %v24, 0(%r2), 3
 ; NO-VECTOR:       ld %f0, 0(%r2)
 ; NO-VECTOR-NEXT:  ld %f2, 8(%r2)
 ; CHECK-NEXT:      br %r14
@@ -111,11 +111,11 @@ entry:
 attributes #7 = { "target-cpu"="zEC12" "target-features"="+vector" "use-soft-float"="false" }
 define <2 x double> @fun7(<2 x double>* %A) #7 {
 ; CHECK-LABEL:     fun7:
-; DEFAULT:         vl %v24, 0(%r2), 4
+; DEFAULT:         vl %v24, 0(%r2), 3
 ; SOFT-FLOAT:      lg %r0, 0(%r2)
 ; SOFT-FLOAT-NEXT: lg %r3, 8(%r2)
 ; SOFT-FLOAT-NEXT: lgr %r2, %r0
-; NO-SOFT-FL:      vl %v24, 0(%r2), 4
+; NO-SOFT-FL:      vl %v24, 0(%r2), 3
 ; NO-VECTOR:       ld %f0, 0(%r2)
 ; NO-VECTOR-NEXT:  ld %f2, 8(%r2)
 ; CHECK-NEXT:      br %r14

diff  --git a/llvm/test/CodeGen/SystemZ/vec-abi-align.ll b/llvm/test/CodeGen/SystemZ/vec-abi-align.ll
index c8ccae2561ab..5a33197b62a5 100644
--- a/llvm/test/CodeGen/SystemZ/vec-abi-align.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-abi-align.ll
@@ -1,55 +1,73 @@
-; Verify that we use the vector ABI datalayout if and only if
-; the vector facility is present.
+; Verify that a struct as generated by the frontend is correctly accessed in
+; both cases of enabling/disabling the vector facility.
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=generic | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
 
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=vector | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+vector | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector,vector | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector,+vector | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=vector,-vector | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+vector,-vector | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -mattr=-vector | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -mattr=+soft-float | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
 ; RUN:   -mattr=soft-float,-soft-float | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
 ; RUN:   -mattr=-soft-float,soft-float | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 
-%struct.S = type { i8, <2 x i64> }
+%struct.S_vx = type { i8, <2 x i64> }
+%struct.S_novx = type { i8, [15 x i8], <2 x i64> }
 
-define void @test(%struct.S* %s) nounwind {
-; CHECK-VECTOR-LABEL: @test
+define void @fun_vx(%struct.S_vx* %s) nounwind {
+; CHECK-LABEL: @fun_vx
+;
 ; CHECK-VECTOR: vl %v0, 8(%r2)
-; CHECK-NOVECTOR-LABEL: @test
+; CHECK-VECTOR: vst %v0, 8(%r2), 3
+;
+; CHECK-NOVECTOR-DAG: agsi 16(%r2), 1
+; CHECK-NOVECTOR-DAG: agsi 8(%r2), 1
+  %ptr = getelementptr %struct.S_vx, %struct.S_vx* %s, i64 0, i32 1
+  %vec = load <2 x i64>, <2 x i64>* %ptr
+  %add = add <2 x i64> %vec, <i64 1, i64 1>
+  store <2 x i64> %add, <2 x i64>* %ptr
+  ret void
+}
+
+define void @fun_novx(%struct.S_novx* %s) nounwind {
+; CHECK-LABEL: @fun_novx
+;
+; CHECK-VECTOR: vl  %v0, 16(%r2), 3
+; CHECK-VECTOR: vst %v0, 16(%r2), 3
+;
 ; CHECK-NOVECTOR-DAG: agsi 16(%r2), 1
 ; CHECK-NOVECTOR-DAG: agsi 24(%r2), 1
-  %ptr = getelementptr %struct.S, %struct.S* %s, i64 0, i32 1
+  %ptr = getelementptr %struct.S_novx, %struct.S_novx* %s, i64 0, i32 2
   %vec = load <2 x i64>, <2 x i64>* %ptr
   %add = add <2 x i64> %vec, <i64 1, i64 1>
   store <2 x i64> %add, <2 x i64>* %ptr