[clang] 8737c74 - [PowerPC][MMA] Allow MMA builtin types in pre-P10 compilation units

Kamau Bridgeman via cfe-commits cfe-commits at lists.llvm.org
Tue Oct 5 05:59:39 PDT 2021


Author: Kamau Bridgeman
Date: 2021-10-05T07:59:32-05:00
New Revision: 8737c74fab3aee833d85b7d235d2c47ebb4eed2e

URL: https://github.com/llvm/llvm-project/commit/8737c74fab3aee833d85b7d235d2c47ebb4eed2e
DIFF: https://github.com/llvm/llvm-project/commit/8737c74fab3aee833d85b7d235d2c47ebb4eed2e.diff

LOG: [PowerPC][MMA] Allow MMA builtin types in pre-P10 compilation units

This patch allows the use of __vector_quad and __vector_pair, PPC MMA builtin
types, on all PowerPC 64-bit compilation units. When these types are
made available the builtins that use them automatically become available
so semantic checking for mma and pair vector memop __builtins is also
expanded to ensure these builtin function call are only allowed on
Power10 and new architectures. All related test cases are updated to
ensure test coverage.

Reviewed By: #powerpc, nemanjai

Differential Revision: https://reviews.llvm.org/D109599

Added: 
    clang/test/Sema/ppc-mma-builtins.c
    clang/test/Sema/ppc-paired-vector-builtins.c

Modified: 
    clang/include/clang/Sema/Sema.h
    clang/lib/AST/ASTContext.cpp
    clang/lib/Sema/Sema.cpp
    clang/lib/Sema/SemaChecking.cpp
    clang/test/AST/ast-dump-ppc-types.c
    clang/test/CodeGen/ppc-mma-types.c
    clang/test/CodeGenCXX/ppc-mangle-mma-types.cpp
    llvm/test/CodeGen/PowerPC/mma-acc-memops.ll

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index a85e53a9a69e8..0a68f6f71b8e7 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -12705,7 +12705,8 @@ class Sema final {
                                 int ArgNum, unsigned ExpectedFieldNum,
                                 bool AllowName);
   bool SemaBuiltinARMMemoryTaggingCall(unsigned BuiltinID, CallExpr *TheCall);
-  bool SemaBuiltinPPCMMACall(CallExpr *TheCall, const char *TypeDesc);
+  bool SemaBuiltinPPCMMACall(CallExpr *TheCall, unsigned BuiltinID,
+                             const char *TypeDesc);
 
   bool CheckPPCMMAType(QualType Type, SourceLocation TypeLoc);
 

diff  --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index e2ebe737fdfdc..d1fd3ce061415 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -1444,13 +1444,10 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
 #include "clang/Basic/AArch64SVEACLETypes.def"
   }
 
-  if (Target.getTriple().isPPC64() &&
-      Target.hasFeature("paired-vector-memops")) {
-    if (Target.hasFeature("mma")) {
+  if (Target.getTriple().isPPC64()) {
 #define PPC_VECTOR_MMA_TYPE(Name, Id, Size) \
       InitBuiltinType(Id##Ty, BuiltinType::Id);
 #include "clang/Basic/PPCTypes.def"
-    }
 #define PPC_VECTOR_VSX_TYPE(Name, Id, Size) \
     InitBuiltinType(Id##Ty, BuiltinType::Id);
 #include "clang/Basic/PPCTypes.def"

diff  --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index d260a45867e06..cf8dcbb6fc3ef 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -403,13 +403,10 @@ void Sema::Initialize() {
 #include "clang/Basic/AArch64SVEACLETypes.def"
   }
 
-  if (Context.getTargetInfo().getTriple().isPPC64() &&
-      Context.getTargetInfo().hasFeature("paired-vector-memops")) {
-    if (Context.getTargetInfo().hasFeature("mma")) {
+  if (Context.getTargetInfo().getTriple().isPPC64()) {
 #define PPC_VECTOR_MMA_TYPE(Name, Id, Size) \
       addImplicitTypedef(#Name, Context.Id##Ty);
 #include "clang/Basic/PPCTypes.def"
-    }
 #define PPC_VECTOR_VSX_TYPE(Name, Id, Size) \
     addImplicitTypedef(#Name, Context.Id##Ty);
 #include "clang/Basic/PPCTypes.def"

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index a6d26ac65465d..0ee05c9f09a52 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3521,9 +3521,9 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
   case PPC::BI__builtin_ppc_store8r:
     return SemaFeatureCheck(*this, TheCall, "isa-v206-instructions",
                             diag::err_ppc_builtin_only_on_arch, "7");
-#define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \
-  case PPC::BI__builtin_##Name: \
-    return SemaBuiltinPPCMMACall(TheCall, Types);
+#define CUSTOM_BUILTIN(Name, Intr, Types, Acc)                                 \
+  case PPC::BI__builtin_##Name:                                                \
+    return SemaBuiltinPPCMMACall(TheCall, BuiltinID, Types);
 #include "clang/Basic/BuiltinsPPC.def"
   }
   return SemaBuiltinConstantArgRange(TheCall, i, l, u);
@@ -7481,11 +7481,35 @@ bool Sema::SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall,
 /// Emit an error and return true on failure; return false on success.
 /// TypeStr is a string containing the type descriptor of the value returned by
 /// the builtin and the descriptors of the expected type of the arguments.
-bool Sema::SemaBuiltinPPCMMACall(CallExpr *TheCall, const char *TypeStr) {
+bool Sema::SemaBuiltinPPCMMACall(CallExpr *TheCall, unsigned BuiltinID,
+                                 const char *TypeStr) {
 
   assert((TypeStr[0] != '\0') &&
          "Invalid types in PPC MMA builtin declaration");
 
+  switch (BuiltinID) {
+  default:
+    // This function is called in CheckPPCBuiltinFunctionCall where the
+    // BuiltinID is guaranteed to be an MMA or pair vector memop builtin, here
+    // we are isolating the pair vector memop builtins that can be used with mma
+    // off so the default case is every builtin that requires mma and paired
+    // vector memops.
+    if (SemaFeatureCheck(*this, TheCall, "paired-vector-memops",
+                         diag::err_ppc_builtin_only_on_arch, "10") ||
+        SemaFeatureCheck(*this, TheCall, "mma",
+                         diag::err_ppc_builtin_only_on_arch, "10"))
+      return true;
+    break;
+  case PPC::BI__builtin_vsx_lxvp:
+  case PPC::BI__builtin_vsx_stxvp:
+  case PPC::BI__builtin_vsx_assemble_pair:
+  case PPC::BI__builtin_vsx_disassemble_pair:
+    if (SemaFeatureCheck(*this, TheCall, "paired-vector-memops",
+                         diag::err_ppc_builtin_only_on_arch, "10"))
+      return true;
+    break;
+  }
+
   unsigned Mask = 0;
   unsigned ArgNum = 0;
 

diff  --git a/clang/test/AST/ast-dump-ppc-types.c b/clang/test/AST/ast-dump-ppc-types.c
index 013f935376a6f..26ae5441f20d7 100644
--- a/clang/test/AST/ast-dump-ppc-types.c
+++ b/clang/test/AST/ast-dump-ppc-types.c
@@ -1,13 +1,9 @@
-// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu future \
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
 // RUN:   -ast-dump -ast-dump-filter __vector %s | FileCheck %s
-// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu future \
-// RUN:   -target-feature -mma -ast-dump %s | FileCheck %s \
-// RUN:   --check-prefix=CHECK-NO-MMA
-// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu future \
-// RUN:   -target-feature -paired-vector-memops -ast-dump %s | FileCheck %s \
-// RUN:   --check-prefix=CHECK-NO-PAIRED
 // RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr9 \
-// RUN:   -ast-dump %s | FileCheck %s --check-prefix=CHECK-PWR9
+// RUN:   -ast-dump -ast-dump-filter __vector %s | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr8 \
+// RUN:   -ast-dump -ast-dump-filter __vector %s | FileCheck %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -ast-dump %s | FileCheck %s \
 // RUN:   --check-prefix=CHECK-X86_64
 // RUN: %clang_cc1 -triple arm-unknown-unknown -ast-dump %s | FileCheck %s \
@@ -24,15 +20,6 @@
 // CHECK: TypedefDecl {{.*}} implicit __vector_pair '__vector_pair'
 // CHECK-NEXT: -BuiltinType {{.*}} '__vector_pair'
 
-// CHECK-NO-MMA-NOT: __vector_quad
-// CHECK-NO-MMA: __vector_pair
-
-// CHECK-NO-PAIRED-NOT: __vector_quad
-// CHECK-NO-PAIRED-NOT: __vector_pair
-
-// CHECK-PWR9-NOT: __vector_quad
-// CHECK-PWR9-NOT: __vector_pair
-
 // CHECK-X86_64-NOT: __vector_quad
 // CHECK-X86_64-NOT: __vector_pair
 

diff  --git a/clang/test/CodeGen/ppc-mma-types.c b/clang/test/CodeGen/ppc-mma-types.c
index 777f5e56e9da0..bce930fdc7134 100644
--- a/clang/test/CodeGen/ppc-mma-types.c
+++ b/clang/test/CodeGen/ppc-mma-types.c
@@ -1,5 +1,9 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu future \
+// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr10 \
+// RUN:   -emit-llvm -O3 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr9 \
+// RUN:   -emit-llvm -O3 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr8 \
 // RUN:   -emit-llvm -O3 -o - %s | FileCheck %s
 
 // CHECK-LABEL: @test1(

diff  --git a/clang/test/CodeGenCXX/ppc-mangle-mma-types.cpp b/clang/test/CodeGenCXX/ppc-mangle-mma-types.cpp
index 228c6b0740e4c..74e50ceea386b 100644
--- a/clang/test/CodeGenCXX/ppc-mangle-mma-types.cpp
+++ b/clang/test/CodeGenCXX/ppc-mangle-mma-types.cpp
@@ -1,4 +1,8 @@
-// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu future %s \
+// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr10 %s \
+// RUN:   -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr9 %s \
+// RUN:   -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr8 %s \
 // RUN:   -emit-llvm -o - | FileCheck %s
 
 // CHECK: _Z2f1Pu13__vector_quad

diff  --git a/clang/test/Sema/ppc-mma-builtins.c b/clang/test/Sema/ppc-mma-builtins.c
new file mode 100644
index 0000000000000..66cb54266f6ca
--- /dev/null
+++ b/clang/test/Sema/ppc-mma-builtins.c
@@ -0,0 +1,33 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
+// RUN:   -target-feature -mma -fsyntax-only %s -verify
+
+void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+  __vector_pair res;
+  __builtin_vsx_assemble_pair(&res, vc, vc);
+}
+
+void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+  __builtin_vsx_disassemble_pair(resp, (__vector_pair*)vpp);
+}
+
+void test3(const __vector_pair *vpp, signed long offset, const __vector_pair *vp2) {
+  __vector_pair vp = __builtin_vsx_lxvp(offset, vpp);
+  __builtin_vsx_stxvp(vp, offset, vp2);
+}
+
+void test4(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+  __vector_quad vq = *((__vector_quad *)vqp);
+  __vector_pair vp = *((__vector_pair *)vpp);
+  __builtin_mma_xxmtacc(&vq); // expected-error {{this builtin is only valid on POWER10 or later CPUs}}
+  *((__vector_quad *)resp) = vq;
+}
+
+void test5(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+  __vector_quad vq = *((__vector_quad *)vqp);
+  __vector_pair vp = *((__vector_pair *)vpp);
+  __builtin_mma_pmxvf64ger(&vq, vp, vc, 0, 0); // expected-error {{this builtin is only valid on POWER10 or later CPUs}}
+  *((__vector_quad *)resp) = vq;
+}
+
+

diff  --git a/clang/test/Sema/ppc-paired-vector-builtins.c b/clang/test/Sema/ppc-paired-vector-builtins.c
new file mode 100644
index 0000000000000..67010909256fa
--- /dev/null
+++ b/clang/test/Sema/ppc-paired-vector-builtins.c
@@ -0,0 +1,28 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
+// RUN:   -target-feature -paired-vector-memops -fsyntax-only %s -verify
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr9 \
+// RUN:   -fsyntax-only %s -verify
+
+void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+  __vector_pair res;
+  __builtin_vsx_assemble_pair(&res, vc, vc); // expected-error {{this builtin is only valid on POWER10 or later CPUs}}
+}
+
+void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+  __builtin_vsx_disassemble_pair(resp, (__vector_pair*)vpp); // expected-error {{this builtin is only valid on POWER10 or later CPUs}}
+}
+
+void test3(const __vector_pair *vpp, signed long long offset, const __vector_pair *vp2) {
+  __vector_pair vp = __builtin_vsx_lxvp(offset, vpp); // expected-error {{this builtin is only valid on POWER10 or later CPUs}}
+  __builtin_vsx_stxvp(vp, offset, vp2); // expected-error {{this builtin is only valid on POWER10 or later CPUs}}
+}
+
+void test4(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+  __vector_quad vq = *((__vector_quad *)vqp);
+  __vector_pair vp = *((__vector_pair *)vpp);
+  __builtin_mma_xxmtacc(&vq); // expected-error {{this builtin is only valid on POWER10 or later CPUs}}
+  *((__vector_quad *)resp) = vq;
+}
+
+

diff  --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
index 6e3ea1bfa1200..ee97843beac2b 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
@@ -5,6 +5,18 @@
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
 ; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=BE-PAIRED
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \
+; RUN:   -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \
+; RUN:   | FileCheck %s --check-prefix=LE-PWR9
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-vsr-nums-as-vr \
+; RUN:   -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \
+; RUN:   | FileCheck %s --check-prefix=LE-PWR8
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \
+; RUN:   -ppc-asm-full-reg-names -mtriple=powerpc64-unknown-linux-gnu < %s \
+; RUN:   | FileCheck %s --check-prefix=BE-PWR9
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-vsr-nums-as-vr \
+; RUN:   -ppc-asm-full-reg-names -mtriple=powerpc64-unknown-linux-gnu < %s \
+; RUN:   | FileCheck %s --check-prefix=BE-PWR8
 
 @f = common dso_local local_unnamed_addr global <512 x i1> zeroinitializer, align 16
 @g = common dso_local local_unnamed_addr global <256 x i1> zeroinitializer, align 16
@@ -35,6 +47,78 @@ define dso_local void @testLdSt(i64 %SrcIdx, i64 %DstIdx) {
 ; BE-PAIRED-NEXT:    stxv vs3, 176(r3)
 ; BE-PAIRED-NEXT:    stxv vs2, 160(r3)
 ; BE-PAIRED-NEXT:    blr
+;
+; LE-PWR9-LABEL: testLdSt:
+; LE-PWR9:       # %bb.0: # %entry
+; LE-PWR9-NEXT:    addis r3, r2, f at toc@ha
+; LE-PWR9-NEXT:    addi r3, r3, f at toc@l
+; LE-PWR9-NEXT:    lxv vs1, 96(r3)
+; LE-PWR9-NEXT:    lxv vs0, 64(r3)
+; LE-PWR9-NEXT:    lxv vs2, 112(r3)
+; LE-PWR9-NEXT:    stxv vs1, 160(r3)
+; LE-PWR9-NEXT:    lxv vs1, 80(r3)
+; LE-PWR9-NEXT:    stxv vs2, 176(r3)
+; LE-PWR9-NEXT:    stxv vs0, 128(r3)
+; LE-PWR9-NEXT:    stxv vs1, 144(r3)
+; LE-PWR9-NEXT:    blr
+;
+; LE-PWR8-LABEL: testLdSt:
+; LE-PWR8:       # %bb.0: # %entry
+; LE-PWR8-NEXT:    addis r3, r2, f at toc@ha
+; LE-PWR8-NEXT:    li r4, 96
+; LE-PWR8-NEXT:    li r5, 112
+; LE-PWR8-NEXT:    addi r3, r3, f at toc@l
+; LE-PWR8-NEXT:    lxvd2x vs0, r3, r4
+; LE-PWR8-NEXT:    li r4, 64
+; LE-PWR8-NEXT:    lxvd2x vs1, r3, r5
+; LE-PWR8-NEXT:    li r5, 80
+; LE-PWR8-NEXT:    lxvd2x vs2, r3, r4
+; LE-PWR8-NEXT:    lxvd2x vs3, r3, r5
+; LE-PWR8-NEXT:    li r4, 176
+; LE-PWR8-NEXT:    li r5, 160
+; LE-PWR8-NEXT:    stxvd2x vs1, r3, r4
+; LE-PWR8-NEXT:    li r4, 144
+; LE-PWR8-NEXT:    stxvd2x vs0, r3, r5
+; LE-PWR8-NEXT:    li r5, 128
+; LE-PWR8-NEXT:    stxvd2x vs3, r3, r4
+; LE-PWR8-NEXT:    stxvd2x vs2, r3, r5
+; LE-PWR8-NEXT:    blr
+;
+; BE-PWR9-LABEL: testLdSt:
+; BE-PWR9:       # %bb.0: # %entry
+; BE-PWR9-NEXT:    addis r3, r2, f at toc@ha
+; BE-PWR9-NEXT:    addi r3, r3, f at toc@l
+; BE-PWR9-NEXT:    lxv vs1, 96(r3)
+; BE-PWR9-NEXT:    lxv vs0, 64(r3)
+; BE-PWR9-NEXT:    lxv vs2, 112(r3)
+; BE-PWR9-NEXT:    stxv vs1, 160(r3)
+; BE-PWR9-NEXT:    lxv vs1, 80(r3)
+; BE-PWR9-NEXT:    stxv vs2, 176(r3)
+; BE-PWR9-NEXT:    stxv vs0, 128(r3)
+; BE-PWR9-NEXT:    stxv vs1, 144(r3)
+; BE-PWR9-NEXT:    blr
+;
+; BE-PWR8-LABEL: testLdSt:
+; BE-PWR8:       # %bb.0: # %entry
+; BE-PWR8-NEXT:    addis r3, r2, f at toc@ha
+; BE-PWR8-NEXT:    li r4, 96
+; BE-PWR8-NEXT:    li r5, 112
+; BE-PWR8-NEXT:    addi r3, r3, f at toc@l
+; BE-PWR8-NEXT:    lxvd2x vs0, r3, r4
+; BE-PWR8-NEXT:    li r4, 64
+; BE-PWR8-NEXT:    lxvd2x vs1, r3, r5
+; BE-PWR8-NEXT:    li r5, 80
+; BE-PWR8-NEXT:    lxvd2x vs2, r3, r4
+; BE-PWR8-NEXT:    lxvd2x vs3, r3, r5
+; BE-PWR8-NEXT:    li r4, 176
+; BE-PWR8-NEXT:    li r5, 160
+; BE-PWR8-NEXT:    stxvd2x vs1, r3, r4
+; BE-PWR8-NEXT:    li r4, 144
+; BE-PWR8-NEXT:    stxvd2x vs0, r3, r5
+; BE-PWR8-NEXT:    li r5, 128
+; BE-PWR8-NEXT:    stxvd2x vs3, r3, r4
+; BE-PWR8-NEXT:    stxvd2x vs2, r3, r5
+; BE-PWR8-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds <512 x i1>, <512 x i1>* @f, i64 1
   %0 = load <512 x i1>, <512 x i1>* %arrayidx, align 64
@@ -78,6 +162,84 @@ define dso_local void @testXLdSt(i64 %SrcIdx, i64 %DstIdx) {
 ; BE-PAIRED-NEXT:    stxv vs3, 48(r3)
 ; BE-PAIRED-NEXT:    stxv vs2, 32(r3)
 ; BE-PAIRED-NEXT:    blr
+;
+; LE-PWR9-LABEL: testXLdSt:
+; LE-PWR9:       # %bb.0: # %entry
+; LE-PWR9-NEXT:    addis r5, r2, f at toc@ha
+; LE-PWR9-NEXT:    sldi r3, r3, 6
+; LE-PWR9-NEXT:    addi r5, r5, f at toc@l
+; LE-PWR9-NEXT:    add r6, r5, r3
+; LE-PWR9-NEXT:    lxvx vs3, r5, r3
+; LE-PWR9-NEXT:    sldi r3, r4, 6
+; LE-PWR9-NEXT:    lxv vs0, 16(r6)
+; LE-PWR9-NEXT:    lxv vs1, 32(r6)
+; LE-PWR9-NEXT:    lxv vs2, 48(r6)
+; LE-PWR9-NEXT:    stxvx vs3, r5, r3
+; LE-PWR9-NEXT:    add r3, r5, r3
+; LE-PWR9-NEXT:    stxv vs2, 48(r3)
+; LE-PWR9-NEXT:    stxv vs1, 32(r3)
+; LE-PWR9-NEXT:    stxv vs0, 16(r3)
+; LE-PWR9-NEXT:    blr
+;
+; LE-PWR8-LABEL: testXLdSt:
+; LE-PWR8:       # %bb.0: # %entry
+; LE-PWR8-NEXT:    addis r5, r2, f at toc@ha
+; LE-PWR8-NEXT:    sldi r3, r3, 6
+; LE-PWR8-NEXT:    li r6, 48
+; LE-PWR8-NEXT:    li r8, 16
+; LE-PWR8-NEXT:    li r9, 32
+; LE-PWR8-NEXT:    addi r5, r5, f at toc@l
+; LE-PWR8-NEXT:    add r7, r5, r3
+; LE-PWR8-NEXT:    lxvd2x vs0, r5, r3
+; LE-PWR8-NEXT:    sldi r3, r4, 6
+; LE-PWR8-NEXT:    lxvd2x vs1, r7, r6
+; LE-PWR8-NEXT:    lxvd2x vs2, r7, r8
+; LE-PWR8-NEXT:    add r4, r5, r3
+; LE-PWR8-NEXT:    lxvd2x vs3, r7, r9
+; LE-PWR8-NEXT:    stxvd2x vs0, r5, r3
+; LE-PWR8-NEXT:    stxvd2x vs1, r4, r6
+; LE-PWR8-NEXT:    stxvd2x vs3, r4, r9
+; LE-PWR8-NEXT:    stxvd2x vs2, r4, r8
+; LE-PWR8-NEXT:    blr
+;
+; BE-PWR9-LABEL: testXLdSt:
+; BE-PWR9:       # %bb.0: # %entry
+; BE-PWR9-NEXT:    addis r5, r2, f at toc@ha
+; BE-PWR9-NEXT:    sldi r3, r3, 6
+; BE-PWR9-NEXT:    addi r5, r5, f at toc@l
+; BE-PWR9-NEXT:    add r6, r5, r3
+; BE-PWR9-NEXT:    lxvx vs3, r5, r3
+; BE-PWR9-NEXT:    sldi r3, r4, 6
+; BE-PWR9-NEXT:    lxv vs0, 16(r6)
+; BE-PWR9-NEXT:    lxv vs1, 32(r6)
+; BE-PWR9-NEXT:    lxv vs2, 48(r6)
+; BE-PWR9-NEXT:    stxvx vs3, r5, r3
+; BE-PWR9-NEXT:    add r3, r5, r3
+; BE-PWR9-NEXT:    stxv vs2, 48(r3)
+; BE-PWR9-NEXT:    stxv vs1, 32(r3)
+; BE-PWR9-NEXT:    stxv vs0, 16(r3)
+; BE-PWR9-NEXT:    blr
+;
+; BE-PWR8-LABEL: testXLdSt:
+; BE-PWR8:       # %bb.0: # %entry
+; BE-PWR8-NEXT:    addis r5, r2, f at toc@ha
+; BE-PWR8-NEXT:    sldi r3, r3, 6
+; BE-PWR8-NEXT:    li r6, 32
+; BE-PWR8-NEXT:    li r7, 48
+; BE-PWR8-NEXT:    li r9, 16
+; BE-PWR8-NEXT:    addi r5, r5, f at toc@l
+; BE-PWR8-NEXT:    add r8, r5, r3
+; BE-PWR8-NEXT:    lxvd2x vs2, r5, r3
+; BE-PWR8-NEXT:    sldi r3, r4, 6
+; BE-PWR8-NEXT:    lxvd2x vs0, r8, r6
+; BE-PWR8-NEXT:    lxvd2x vs1, r8, r7
+; BE-PWR8-NEXT:    add r4, r5, r3
+; BE-PWR8-NEXT:    lxvd2x vs3, r8, r9
+; BE-PWR8-NEXT:    stxvd2x vs2, r5, r3
+; BE-PWR8-NEXT:    stxvd2x vs1, r4, r7
+; BE-PWR8-NEXT:    stxvd2x vs0, r4, r6
+; BE-PWR8-NEXT:    stxvd2x vs3, r4, r9
+; BE-PWR8-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds <512 x i1>, <512 x i1>* @f, i64 %SrcIdx
   %0 = load <512 x i1>, <512 x i1>* %arrayidx, align 64
@@ -112,6 +274,94 @@ define dso_local void @testUnalignedLdSt() {
 ; BE-PAIRED-NEXT:    pstxv vs3, 67(r3), 0
 ; BE-PAIRED-NEXT:    pstxv vs2, 51(r3), 0
 ; BE-PAIRED-NEXT:    blr
+;
+; LE-PWR9-LABEL: testUnalignedLdSt:
+; LE-PWR9:       # %bb.0: # %entry
+; LE-PWR9-NEXT:    addis r3, r2, f at toc@ha
+; LE-PWR9-NEXT:    li r4, 11
+; LE-PWR9-NEXT:    addi r3, r3, f at toc@l
+; LE-PWR9-NEXT:    lxvx vs0, r3, r4
+; LE-PWR9-NEXT:    li r4, 27
+; LE-PWR9-NEXT:    lxvx vs1, r3, r4
+; LE-PWR9-NEXT:    li r4, 43
+; LE-PWR9-NEXT:    lxvx vs2, r3, r4
+; LE-PWR9-NEXT:    li r4, 59
+; LE-PWR9-NEXT:    lxvx vs3, r3, r4
+; LE-PWR9-NEXT:    li r4, 67
+; LE-PWR9-NEXT:    stxvx vs3, r3, r4
+; LE-PWR9-NEXT:    li r4, 51
+; LE-PWR9-NEXT:    stxvx vs2, r3, r4
+; LE-PWR9-NEXT:    li r4, 35
+; LE-PWR9-NEXT:    stxvx vs1, r3, r4
+; LE-PWR9-NEXT:    li r4, 19
+; LE-PWR9-NEXT:    stxvx vs0, r3, r4
+; LE-PWR9-NEXT:    blr
+;
+; LE-PWR8-LABEL: testUnalignedLdSt:
+; LE-PWR8:       # %bb.0: # %entry
+; LE-PWR8-NEXT:    addis r3, r2, f at toc@ha
+; LE-PWR8-NEXT:    li r4, 59
+; LE-PWR8-NEXT:    li r5, 43
+; LE-PWR8-NEXT:    addi r3, r3, f at toc@l
+; LE-PWR8-NEXT:    lxvd2x vs0, r3, r4
+; LE-PWR8-NEXT:    li r4, 11
+; LE-PWR8-NEXT:    lxvd2x vs1, r3, r5
+; LE-PWR8-NEXT:    li r5, 27
+; LE-PWR8-NEXT:    lxvd2x vs2, r3, r4
+; LE-PWR8-NEXT:    lxvd2x vs3, r3, r5
+; LE-PWR8-NEXT:    li r4, 51
+; LE-PWR8-NEXT:    li r5, 67
+; LE-PWR8-NEXT:    stxvd2x vs1, r3, r4
+; LE-PWR8-NEXT:    li r4, 35
+; LE-PWR8-NEXT:    stxvd2x vs0, r3, r5
+; LE-PWR8-NEXT:    li r5, 19
+; LE-PWR8-NEXT:    stxvd2x vs3, r3, r4
+; LE-PWR8-NEXT:    stxvd2x vs2, r3, r5
+; LE-PWR8-NEXT:    blr
+;
+; BE-PWR9-LABEL: testUnalignedLdSt:
+; BE-PWR9:       # %bb.0: # %entry
+; BE-PWR9-NEXT:    addis r3, r2, f at toc@ha
+; BE-PWR9-NEXT:    li r4, 11
+; BE-PWR9-NEXT:    addi r3, r3, f at toc@l
+; BE-PWR9-NEXT:    lxvx vs0, r3, r4
+; BE-PWR9-NEXT:    li r4, 27
+; BE-PWR9-NEXT:    lxvx vs1, r3, r4
+; BE-PWR9-NEXT:    li r4, 43
+; BE-PWR9-NEXT:    lxvx vs2, r3, r4
+; BE-PWR9-NEXT:    li r4, 59
+; BE-PWR9-NEXT:    lxvx vs3, r3, r4
+; BE-PWR9-NEXT:    li r4, 67
+; BE-PWR9-NEXT:    stxvx vs3, r3, r4
+; BE-PWR9-NEXT:    li r4, 51
+; BE-PWR9-NEXT:    stxvx vs2, r3, r4
+; BE-PWR9-NEXT:    li r4, 35
+; BE-PWR9-NEXT:    stxvx vs1, r3, r4
+; BE-PWR9-NEXT:    li r4, 19
+; BE-PWR9-NEXT:    stxvx vs0, r3, r4
+; BE-PWR9-NEXT:    blr
+;
+; BE-PWR8-LABEL: testUnalignedLdSt:
+; BE-PWR8:       # %bb.0: # %entry
+; BE-PWR8-NEXT:    addis r3, r2, f at toc@ha
+; BE-PWR8-NEXT:    li r4, 43
+; BE-PWR8-NEXT:    li r5, 59
+; BE-PWR8-NEXT:    addi r3, r3, f at toc@l
+; BE-PWR8-NEXT:    lxvd2x vs0, r3, r4
+; BE-PWR8-NEXT:    li r4, 11
+; BE-PWR8-NEXT:    lxvd2x vs1, r3, r5
+; BE-PWR8-NEXT:    li r5, 27
+; BE-PWR8-NEXT:    lxvd2x vs2, r3, r4
+; BE-PWR8-NEXT:    lxvd2x vs3, r3, r5
+; BE-PWR8-NEXT:    li r4, 67
+; BE-PWR8-NEXT:    li r5, 51
+; BE-PWR8-NEXT:    stxvd2x vs1, r3, r4
+; BE-PWR8-NEXT:    li r4, 35
+; BE-PWR8-NEXT:    stxvd2x vs0, r3, r5
+; BE-PWR8-NEXT:    li r5, 19
+; BE-PWR8-NEXT:    stxvd2x vs3, r3, r4
+; BE-PWR8-NEXT:    stxvd2x vs2, r3, r5
+; BE-PWR8-NEXT:    blr
 entry:
   %0 = bitcast <512 x i1>* @f to i8*
   %add.ptr = getelementptr inbounds i8, i8* %0, i64 11
@@ -141,6 +391,54 @@ define dso_local void @testLdStPair(i64 %SrcIdx, i64 %DstIdx) {
 ; BE-PAIRED-NEXT:    stxv v3, 80(r3)
 ; BE-PAIRED-NEXT:    stxv v2, 64(r3)
 ; BE-PAIRED-NEXT:    blr
+;
+; LE-PWR9-LABEL: testLdStPair:
+; LE-PWR9:       # %bb.0: # %entry
+; LE-PWR9-NEXT:    addis r3, r2, g at toc@ha
+; LE-PWR9-NEXT:    addi r3, r3, g at toc@l
+; LE-PWR9-NEXT:    lxv vs0, 32(r3)
+; LE-PWR9-NEXT:    lxv vs1, 48(r3)
+; LE-PWR9-NEXT:    stxv vs1, 80(r3)
+; LE-PWR9-NEXT:    stxv vs0, 64(r3)
+; LE-PWR9-NEXT:    blr
+;
+; LE-PWR8-LABEL: testLdStPair:
+; LE-PWR8:       # %bb.0: # %entry
+; LE-PWR8-NEXT:    addis r3, r2, g at toc@ha
+; LE-PWR8-NEXT:    li r4, 32
+; LE-PWR8-NEXT:    li r5, 48
+; LE-PWR8-NEXT:    addi r3, r3, g at toc@l
+; LE-PWR8-NEXT:    lxvd2x vs0, r3, r4
+; LE-PWR8-NEXT:    lxvd2x vs1, r3, r5
+; LE-PWR8-NEXT:    li r4, 80
+; LE-PWR8-NEXT:    li r5, 64
+; LE-PWR8-NEXT:    stxvd2x vs1, r3, r4
+; LE-PWR8-NEXT:    stxvd2x vs0, r3, r5
+; LE-PWR8-NEXT:    blr
+;
+; BE-PWR9-LABEL: testLdStPair:
+; BE-PWR9:       # %bb.0: # %entry
+; BE-PWR9-NEXT:    addis r3, r2, g at toc@ha
+; BE-PWR9-NEXT:    addi r3, r3, g at toc@l
+; BE-PWR9-NEXT:    lxv vs0, 32(r3)
+; BE-PWR9-NEXT:    lxv vs1, 48(r3)
+; BE-PWR9-NEXT:    stxv vs1, 80(r3)
+; BE-PWR9-NEXT:    stxv vs0, 64(r3)
+; BE-PWR9-NEXT:    blr
+;
+; BE-PWR8-LABEL: testLdStPair:
+; BE-PWR8:       # %bb.0: # %entry
+; BE-PWR8-NEXT:    addis r3, r2, g at toc@ha
+; BE-PWR8-NEXT:    li r4, 32
+; BE-PWR8-NEXT:    li r5, 48
+; BE-PWR8-NEXT:    addi r3, r3, g at toc@l
+; BE-PWR8-NEXT:    lxvd2x vs0, r3, r4
+; BE-PWR8-NEXT:    lxvd2x vs1, r3, r5
+; BE-PWR8-NEXT:    li r4, 80
+; BE-PWR8-NEXT:    li r5, 64
+; BE-PWR8-NEXT:    stxvd2x vs1, r3, r4
+; BE-PWR8-NEXT:    stxvd2x vs0, r3, r5
+; BE-PWR8-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds <256 x i1>, <256 x i1>* @g, i64 1
   %0 = load <256 x i1>, <256 x i1>* %arrayidx, align 64
@@ -176,6 +474,64 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) {
 ; BE-PAIRED-NEXT:    stxvx v2, r5, r3
 ; BE-PAIRED-NEXT:    stxv v3, 16(r4)
 ; BE-PAIRED-NEXT:    blr
+;
+; LE-PWR9-LABEL: testXLdStPair:
+; LE-PWR9:       # %bb.0: # %entry
+; LE-PWR9-NEXT:    addis r5, r2, g at toc@ha
+; LE-PWR9-NEXT:    sldi r3, r3, 5
+; LE-PWR9-NEXT:    sldi r4, r4, 5
+; LE-PWR9-NEXT:    addi r5, r5, g at toc@l
+; LE-PWR9-NEXT:    add r6, r5, r3
+; LE-PWR9-NEXT:    lxvx vs1, r5, r3
+; LE-PWR9-NEXT:    lxv vs0, 16(r6)
+; LE-PWR9-NEXT:    add r6, r5, r4
+; LE-PWR9-NEXT:    stxvx vs1, r5, r4
+; LE-PWR9-NEXT:    stxv vs0, 16(r6)
+; LE-PWR9-NEXT:    blr
+;
+; LE-PWR8-LABEL: testXLdStPair:
+; LE-PWR8:       # %bb.0: # %entry
+; LE-PWR8-NEXT:    addis r5, r2, g at toc@ha
+; LE-PWR8-NEXT:    sldi r3, r3, 5
+; LE-PWR8-NEXT:    li r7, 16
+; LE-PWR8-NEXT:    addi r5, r5, g at toc@l
+; LE-PWR8-NEXT:    add r6, r5, r3
+; LE-PWR8-NEXT:    lxvd2x vs1, r5, r3
+; LE-PWR8-NEXT:    sldi r3, r4, 5
+; LE-PWR8-NEXT:    lxvd2x vs0, r6, r7
+; LE-PWR8-NEXT:    add r4, r5, r3
+; LE-PWR8-NEXT:    stxvd2x vs1, r5, r3
+; LE-PWR8-NEXT:    stxvd2x vs0, r4, r7
+; LE-PWR8-NEXT:    blr
+;
+; BE-PWR9-LABEL: testXLdStPair:
+; BE-PWR9:       # %bb.0: # %entry
+; BE-PWR9-NEXT:    addis r5, r2, g at toc@ha
+; BE-PWR9-NEXT:    sldi r3, r3, 5
+; BE-PWR9-NEXT:    sldi r4, r4, 5
+; BE-PWR9-NEXT:    addi r5, r5, g at toc@l
+; BE-PWR9-NEXT:    add r6, r5, r3
+; BE-PWR9-NEXT:    lxvx vs1, r5, r3
+; BE-PWR9-NEXT:    lxv vs0, 16(r6)
+; BE-PWR9-NEXT:    add r6, r5, r4
+; BE-PWR9-NEXT:    stxvx vs1, r5, r4
+; BE-PWR9-NEXT:    stxv vs0, 16(r6)
+; BE-PWR9-NEXT:    blr
+;
+; BE-PWR8-LABEL: testXLdStPair:
+; BE-PWR8:       # %bb.0: # %entry
+; BE-PWR8-NEXT:    addis r5, r2, g at toc@ha
+; BE-PWR8-NEXT:    sldi r3, r3, 5
+; BE-PWR8-NEXT:    li r7, 16
+; BE-PWR8-NEXT:    addi r5, r5, g at toc@l
+; BE-PWR8-NEXT:    add r6, r5, r3
+; BE-PWR8-NEXT:    lxvd2x vs0, r5, r3
+; BE-PWR8-NEXT:    sldi r3, r4, 5
+; BE-PWR8-NEXT:    lxvd2x vs1, r6, r7
+; BE-PWR8-NEXT:    add r4, r5, r3
+; BE-PWR8-NEXT:    stxvd2x vs0, r5, r3
+; BE-PWR8-NEXT:    stxvd2x vs1, r4, r7
+; BE-PWR8-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds <256 x i1>, <256 x i1>* @g, i64 %SrcIdx
   %0 = load <256 x i1>, <256 x i1>* %arrayidx, align 64
@@ -202,6 +558,74 @@ define dso_local void @testUnalignedLdStPair() {
 ; BE-PAIRED-NEXT:    pstxv v3, 35(r3), 0
 ; BE-PAIRED-NEXT:    pstxv v2, 19(r3), 0
 ; BE-PAIRED-NEXT:    blr
+;
+; LE-PWR9-LABEL: testUnalignedLdStPair:
+; LE-PWR9:       # %bb.0: # %entry
+; LE-PWR9-NEXT:    addis r3, r2, g at toc@ha
+; LE-PWR9-NEXT:    li r6, 19
+; LE-PWR9-NEXT:    li r4, 11
+; LE-PWR9-NEXT:    li r5, 35
+; LE-PWR9-NEXT:    li r7, 27
+; LE-PWR9-NEXT:    addi r3, r3, g at toc@l
+; LE-PWR9-NEXT:    lxvx vs0, r3, r6
+; LE-PWR9-NEXT:    ldx r4, r3, r4
+; LE-PWR9-NEXT:    ldx r5, r3, r5
+; LE-PWR9-NEXT:    stdx r4, r3, r6
+; LE-PWR9-NEXT:    stxvx vs0, r3, r7
+; LE-PWR9-NEXT:    li r7, 43
+; LE-PWR9-NEXT:    stdx r5, r3, r7
+; LE-PWR9-NEXT:    blr
+;
+; LE-PWR8-LABEL: testUnalignedLdStPair:
+; LE-PWR8:       # %bb.0: # %entry
+; LE-PWR8-NEXT:    addis r3, r2, g at toc@ha
+; LE-PWR8-NEXT:    li r4, 19
+; LE-PWR8-NEXT:    li r5, 11
+; LE-PWR8-NEXT:    li r6, 35
+; LE-PWR8-NEXT:    li r7, 43
+; LE-PWR8-NEXT:    li r8, 27
+; LE-PWR8-NEXT:    addi r3, r3, g at toc@l
+; LE-PWR8-NEXT:    lxvd2x vs0, r3, r4
+; LE-PWR8-NEXT:    ldx r5, r3, r5
+; LE-PWR8-NEXT:    ldx r6, r3, r6
+; LE-PWR8-NEXT:    stdx r6, r3, r7
+; LE-PWR8-NEXT:    stdx r5, r3, r4
+; LE-PWR8-NEXT:    stxvd2x vs0, r3, r8
+; LE-PWR8-NEXT:    blr
+;
+; BE-PWR9-LABEL: testUnalignedLdStPair:
+; BE-PWR9:       # %bb.0: # %entry
+; BE-PWR9-NEXT:    addis r3, r2, g at toc@ha
+; BE-PWR9-NEXT:    li r6, 19
+; BE-PWR9-NEXT:    li r4, 11
+; BE-PWR9-NEXT:    li r5, 35
+; BE-PWR9-NEXT:    li r7, 27
+; BE-PWR9-NEXT:    addi r3, r3, g at toc@l
+; BE-PWR9-NEXT:    lxvx vs0, r3, r6
+; BE-PWR9-NEXT:    ldx r4, r3, r4
+; BE-PWR9-NEXT:    ldx r5, r3, r5
+; BE-PWR9-NEXT:    stdx r4, r3, r6
+; BE-PWR9-NEXT:    stxvx vs0, r3, r7
+; BE-PWR9-NEXT:    li r7, 43
+; BE-PWR9-NEXT:    stdx r5, r3, r7
+; BE-PWR9-NEXT:    blr
+;
+; BE-PWR8-LABEL: testUnalignedLdStPair:
+; BE-PWR8:       # %bb.0: # %entry
+; BE-PWR8-NEXT:    addis r3, r2, g at toc@ha
+; BE-PWR8-NEXT:    li r4, 19
+; BE-PWR8-NEXT:    li r5, 11
+; BE-PWR8-NEXT:    li r6, 35
+; BE-PWR8-NEXT:    li r7, 27
+; BE-PWR8-NEXT:    addi r3, r3, g at toc@l
+; BE-PWR8-NEXT:    lxvd2x vs0, r3, r4
+; BE-PWR8-NEXT:    ldx r5, r3, r5
+; BE-PWR8-NEXT:    ldx r6, r3, r6
+; BE-PWR8-NEXT:    stxvd2x vs0, r3, r7
+; BE-PWR8-NEXT:    li r7, 43
+; BE-PWR8-NEXT:    stdx r5, r3, r4
+; BE-PWR8-NEXT:    stdx r6, r3, r7
+; BE-PWR8-NEXT:    blr
 entry:
   %0 = bitcast <256 x i1>* @g to i8*
   %add.ptr = getelementptr inbounds i8, i8* %0, i64 11


        


More information about the cfe-commits mailing list