[clang] 8737c74 - [PowerPC][MMA] Allow MMA builtin types in pre-P10 compilation units
Kamau Bridgeman via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 5 05:59:39 PDT 2021
Author: Kamau Bridgeman
Date: 2021-10-05T07:59:32-05:00
New Revision: 8737c74fab3aee833d85b7d235d2c47ebb4eed2e
URL: https://github.com/llvm/llvm-project/commit/8737c74fab3aee833d85b7d235d2c47ebb4eed2e
DIFF: https://github.com/llvm/llvm-project/commit/8737c74fab3aee833d85b7d235d2c47ebb4eed2e.diff
LOG: [PowerPC][MMA] Allow MMA builtin types in pre-P10 compilation units
This patch allows the use of __vector_quad and __vector_pair, PPC MMA builtin
types, on all PowerPC 64-bit compilation units. When these types are
made available the builtins that use them automatically become available
so semantic checking for mma and pair vector memop __builtins is also
expanded to ensure these builtin function call are only allowed on
Power10 and new architectures. All related test cases are updated to
ensure test coverage.
Reviewed By: #powerpc, nemanjai
Differential Revision: https://reviews.llvm.org/D109599
Added:
clang/test/Sema/ppc-mma-builtins.c
clang/test/Sema/ppc-paired-vector-builtins.c
Modified:
clang/include/clang/Sema/Sema.h
clang/lib/AST/ASTContext.cpp
clang/lib/Sema/Sema.cpp
clang/lib/Sema/SemaChecking.cpp
clang/test/AST/ast-dump-ppc-types.c
clang/test/CodeGen/ppc-mma-types.c
clang/test/CodeGenCXX/ppc-mangle-mma-types.cpp
llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
Removed:
################################################################################
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index a85e53a9a69e8..0a68f6f71b8e7 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -12705,7 +12705,8 @@ class Sema final {
int ArgNum, unsigned ExpectedFieldNum,
bool AllowName);
bool SemaBuiltinARMMemoryTaggingCall(unsigned BuiltinID, CallExpr *TheCall);
- bool SemaBuiltinPPCMMACall(CallExpr *TheCall, const char *TypeDesc);
+ bool SemaBuiltinPPCMMACall(CallExpr *TheCall, unsigned BuiltinID,
+ const char *TypeDesc);
bool CheckPPCMMAType(QualType Type, SourceLocation TypeLoc);
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index e2ebe737fdfdc..d1fd3ce061415 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -1444,13 +1444,10 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
#include "clang/Basic/AArch64SVEACLETypes.def"
}
- if (Target.getTriple().isPPC64() &&
- Target.hasFeature("paired-vector-memops")) {
- if (Target.hasFeature("mma")) {
+ if (Target.getTriple().isPPC64()) {
#define PPC_VECTOR_MMA_TYPE(Name, Id, Size) \
InitBuiltinType(Id##Ty, BuiltinType::Id);
#include "clang/Basic/PPCTypes.def"
- }
#define PPC_VECTOR_VSX_TYPE(Name, Id, Size) \
InitBuiltinType(Id##Ty, BuiltinType::Id);
#include "clang/Basic/PPCTypes.def"
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index d260a45867e06..cf8dcbb6fc3ef 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -403,13 +403,10 @@ void Sema::Initialize() {
#include "clang/Basic/AArch64SVEACLETypes.def"
}
- if (Context.getTargetInfo().getTriple().isPPC64() &&
- Context.getTargetInfo().hasFeature("paired-vector-memops")) {
- if (Context.getTargetInfo().hasFeature("mma")) {
+ if (Context.getTargetInfo().getTriple().isPPC64()) {
#define PPC_VECTOR_MMA_TYPE(Name, Id, Size) \
addImplicitTypedef(#Name, Context.Id##Ty);
#include "clang/Basic/PPCTypes.def"
- }
#define PPC_VECTOR_VSX_TYPE(Name, Id, Size) \
addImplicitTypedef(#Name, Context.Id##Ty);
#include "clang/Basic/PPCTypes.def"
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index a6d26ac65465d..0ee05c9f09a52 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3521,9 +3521,9 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
case PPC::BI__builtin_ppc_store8r:
return SemaFeatureCheck(*this, TheCall, "isa-v206-instructions",
diag::err_ppc_builtin_only_on_arch, "7");
-#define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \
- case PPC::BI__builtin_##Name: \
- return SemaBuiltinPPCMMACall(TheCall, Types);
+#define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \
+ case PPC::BI__builtin_##Name: \
+ return SemaBuiltinPPCMMACall(TheCall, BuiltinID, Types);
#include "clang/Basic/BuiltinsPPC.def"
}
return SemaBuiltinConstantArgRange(TheCall, i, l, u);
@@ -7481,11 +7481,35 @@ bool Sema::SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall,
/// Emit an error and return true on failure; return false on success.
/// TypeStr is a string containing the type descriptor of the value returned by
/// the builtin and the descriptors of the expected type of the arguments.
-bool Sema::SemaBuiltinPPCMMACall(CallExpr *TheCall, const char *TypeStr) {
+bool Sema::SemaBuiltinPPCMMACall(CallExpr *TheCall, unsigned BuiltinID,
+ const char *TypeStr) {
assert((TypeStr[0] != '\0') &&
"Invalid types in PPC MMA builtin declaration");
+ switch (BuiltinID) {
+ default:
+ // This function is called in CheckPPCBuiltinFunctionCall where the
+ // BuiltinID is guaranteed to be an MMA or pair vector memop builtin, here
+ // we are isolating the pair vector memop builtins that can be used with mma
+ // off so the default case is every builtin that requires mma and paired
+ // vector memops.
+ if (SemaFeatureCheck(*this, TheCall, "paired-vector-memops",
+ diag::err_ppc_builtin_only_on_arch, "10") ||
+ SemaFeatureCheck(*this, TheCall, "mma",
+ diag::err_ppc_builtin_only_on_arch, "10"))
+ return true;
+ break;
+ case PPC::BI__builtin_vsx_lxvp:
+ case PPC::BI__builtin_vsx_stxvp:
+ case PPC::BI__builtin_vsx_assemble_pair:
+ case PPC::BI__builtin_vsx_disassemble_pair:
+ if (SemaFeatureCheck(*this, TheCall, "paired-vector-memops",
+ diag::err_ppc_builtin_only_on_arch, "10"))
+ return true;
+ break;
+ }
+
unsigned Mask = 0;
unsigned ArgNum = 0;
diff --git a/clang/test/AST/ast-dump-ppc-types.c b/clang/test/AST/ast-dump-ppc-types.c
index 013f935376a6f..26ae5441f20d7 100644
--- a/clang/test/AST/ast-dump-ppc-types.c
+++ b/clang/test/AST/ast-dump-ppc-types.c
@@ -1,13 +1,9 @@
-// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu future \
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
// RUN: -ast-dump -ast-dump-filter __vector %s | FileCheck %s
-// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu future \
-// RUN: -target-feature -mma -ast-dump %s | FileCheck %s \
-// RUN: --check-prefix=CHECK-NO-MMA
-// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu future \
-// RUN: -target-feature -paired-vector-memops -ast-dump %s | FileCheck %s \
-// RUN: --check-prefix=CHECK-NO-PAIRED
// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr9 \
-// RUN: -ast-dump %s | FileCheck %s --check-prefix=CHECK-PWR9
+// RUN: -ast-dump -ast-dump-filter __vector %s | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr8 \
+// RUN: -ast-dump -ast-dump-filter __vector %s | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -ast-dump %s | FileCheck %s \
// RUN: --check-prefix=CHECK-X86_64
// RUN: %clang_cc1 -triple arm-unknown-unknown -ast-dump %s | FileCheck %s \
@@ -24,15 +20,6 @@
// CHECK: TypedefDecl {{.*}} implicit __vector_pair '__vector_pair'
// CHECK-NEXT: -BuiltinType {{.*}} '__vector_pair'
-// CHECK-NO-MMA-NOT: __vector_quad
-// CHECK-NO-MMA: __vector_pair
-
-// CHECK-NO-PAIRED-NOT: __vector_quad
-// CHECK-NO-PAIRED-NOT: __vector_pair
-
-// CHECK-PWR9-NOT: __vector_quad
-// CHECK-PWR9-NOT: __vector_pair
-
// CHECK-X86_64-NOT: __vector_quad
// CHECK-X86_64-NOT: __vector_pair
diff --git a/clang/test/CodeGen/ppc-mma-types.c b/clang/test/CodeGen/ppc-mma-types.c
index 777f5e56e9da0..bce930fdc7134 100644
--- a/clang/test/CodeGen/ppc-mma-types.c
+++ b/clang/test/CodeGen/ppc-mma-types.c
@@ -1,5 +1,9 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu future \
+// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr10 \
+// RUN: -emit-llvm -O3 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr9 \
+// RUN: -emit-llvm -O3 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr8 \
// RUN: -emit-llvm -O3 -o - %s | FileCheck %s
// CHECK-LABEL: @test1(
diff --git a/clang/test/CodeGenCXX/ppc-mangle-mma-types.cpp b/clang/test/CodeGenCXX/ppc-mangle-mma-types.cpp
index 228c6b0740e4c..74e50ceea386b 100644
--- a/clang/test/CodeGenCXX/ppc-mangle-mma-types.cpp
+++ b/clang/test/CodeGenCXX/ppc-mangle-mma-types.cpp
@@ -1,4 +1,8 @@
-// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu future %s \
+// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr10 %s \
+// RUN: -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr9 %s \
+// RUN: -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr8 %s \
// RUN: -emit-llvm -o - | FileCheck %s
// CHECK: _Z2f1Pu13__vector_quad
diff --git a/clang/test/Sema/ppc-mma-builtins.c b/clang/test/Sema/ppc-mma-builtins.c
new file mode 100644
index 0000000000000..66cb54266f6ca
--- /dev/null
+++ b/clang/test/Sema/ppc-mma-builtins.c
@@ -0,0 +1,33 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
+// RUN: -target-feature -mma -fsyntax-only %s -verify
+
+void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __vector_pair res;
+ __builtin_vsx_assemble_pair(&res, vc, vc);
+}
+
+void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __builtin_vsx_disassemble_pair(resp, (__vector_pair*)vpp);
+}
+
+void test3(const __vector_pair *vpp, signed long offset, const __vector_pair *vp2) {
+ __vector_pair vp = __builtin_vsx_lxvp(offset, vpp);
+ __builtin_vsx_stxvp(vp, offset, vp2);
+}
+
+void test4(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __vector_quad vq = *((__vector_quad *)vqp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_xxmtacc(&vq); // expected-error {{this builtin is only valid on POWER10 or later CPUs}}
+ *((__vector_quad *)resp) = vq;
+}
+
+void test5(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __vector_quad vq = *((__vector_quad *)vqp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_pmxvf64ger(&vq, vp, vc, 0, 0); // expected-error {{this builtin is only valid on POWER10 or later CPUs}}
+ *((__vector_quad *)resp) = vq;
+}
+
+
diff --git a/clang/test/Sema/ppc-paired-vector-builtins.c b/clang/test/Sema/ppc-paired-vector-builtins.c
new file mode 100644
index 0000000000000..67010909256fa
--- /dev/null
+++ b/clang/test/Sema/ppc-paired-vector-builtins.c
@@ -0,0 +1,28 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
+// RUN: -target-feature -paired-vector-memops -fsyntax-only %s -verify
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr9 \
+// RUN: -fsyntax-only %s -verify
+
+void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __vector_pair res;
+ __builtin_vsx_assemble_pair(&res, vc, vc); // expected-error {{this builtin is only valid on POWER10 or later CPUs}}
+}
+
+void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __builtin_vsx_disassemble_pair(resp, (__vector_pair*)vpp); // expected-error {{this builtin is only valid on POWER10 or later CPUs}}
+}
+
+void test3(const __vector_pair *vpp, signed long long offset, const __vector_pair *vp2) {
+ __vector_pair vp = __builtin_vsx_lxvp(offset, vpp); // expected-error {{this builtin is only valid on POWER10 or later CPUs}}
+ __builtin_vsx_stxvp(vp, offset, vp2); // expected-error {{this builtin is only valid on POWER10 or later CPUs}}
+}
+
+void test4(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __vector_quad vq = *((__vector_quad *)vqp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_xxmtacc(&vq); // expected-error {{this builtin is only valid on POWER10 or later CPUs}}
+ *((__vector_quad *)resp) = vq;
+}
+
+
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
index 6e3ea1bfa1200..ee97843beac2b 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
@@ -5,6 +5,18 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=BE-PAIRED
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \
+; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \
+; RUN: | FileCheck %s --check-prefix=LE-PWR9
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-vsr-nums-as-vr \
+; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \
+; RUN: | FileCheck %s --check-prefix=LE-PWR8
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \
+; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64-unknown-linux-gnu < %s \
+; RUN: | FileCheck %s --check-prefix=BE-PWR9
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-vsr-nums-as-vr \
+; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64-unknown-linux-gnu < %s \
+; RUN: | FileCheck %s --check-prefix=BE-PWR8
@f = common dso_local local_unnamed_addr global <512 x i1> zeroinitializer, align 16
@g = common dso_local local_unnamed_addr global <256 x i1> zeroinitializer, align 16
@@ -35,6 +47,78 @@ define dso_local void @testLdSt(i64 %SrcIdx, i64 %DstIdx) {
; BE-PAIRED-NEXT: stxv vs3, 176(r3)
; BE-PAIRED-NEXT: stxv vs2, 160(r3)
; BE-PAIRED-NEXT: blr
+;
+; LE-PWR9-LABEL: testLdSt:
+; LE-PWR9: # %bb.0: # %entry
+; LE-PWR9-NEXT: addis r3, r2, f at toc@ha
+; LE-PWR9-NEXT: addi r3, r3, f at toc@l
+; LE-PWR9-NEXT: lxv vs1, 96(r3)
+; LE-PWR9-NEXT: lxv vs0, 64(r3)
+; LE-PWR9-NEXT: lxv vs2, 112(r3)
+; LE-PWR9-NEXT: stxv vs1, 160(r3)
+; LE-PWR9-NEXT: lxv vs1, 80(r3)
+; LE-PWR9-NEXT: stxv vs2, 176(r3)
+; LE-PWR9-NEXT: stxv vs0, 128(r3)
+; LE-PWR9-NEXT: stxv vs1, 144(r3)
+; LE-PWR9-NEXT: blr
+;
+; LE-PWR8-LABEL: testLdSt:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: addis r3, r2, f at toc@ha
+; LE-PWR8-NEXT: li r4, 96
+; LE-PWR8-NEXT: li r5, 112
+; LE-PWR8-NEXT: addi r3, r3, f at toc@l
+; LE-PWR8-NEXT: lxvd2x vs0, r3, r4
+; LE-PWR8-NEXT: li r4, 64
+; LE-PWR8-NEXT: lxvd2x vs1, r3, r5
+; LE-PWR8-NEXT: li r5, 80
+; LE-PWR8-NEXT: lxvd2x vs2, r3, r4
+; LE-PWR8-NEXT: lxvd2x vs3, r3, r5
+; LE-PWR8-NEXT: li r4, 176
+; LE-PWR8-NEXT: li r5, 160
+; LE-PWR8-NEXT: stxvd2x vs1, r3, r4
+; LE-PWR8-NEXT: li r4, 144
+; LE-PWR8-NEXT: stxvd2x vs0, r3, r5
+; LE-PWR8-NEXT: li r5, 128
+; LE-PWR8-NEXT: stxvd2x vs3, r3, r4
+; LE-PWR8-NEXT: stxvd2x vs2, r3, r5
+; LE-PWR8-NEXT: blr
+;
+; BE-PWR9-LABEL: testLdSt:
+; BE-PWR9: # %bb.0: # %entry
+; BE-PWR9-NEXT: addis r3, r2, f at toc@ha
+; BE-PWR9-NEXT: addi r3, r3, f at toc@l
+; BE-PWR9-NEXT: lxv vs1, 96(r3)
+; BE-PWR9-NEXT: lxv vs0, 64(r3)
+; BE-PWR9-NEXT: lxv vs2, 112(r3)
+; BE-PWR9-NEXT: stxv vs1, 160(r3)
+; BE-PWR9-NEXT: lxv vs1, 80(r3)
+; BE-PWR9-NEXT: stxv vs2, 176(r3)
+; BE-PWR9-NEXT: stxv vs0, 128(r3)
+; BE-PWR9-NEXT: stxv vs1, 144(r3)
+; BE-PWR9-NEXT: blr
+;
+; BE-PWR8-LABEL: testLdSt:
+; BE-PWR8: # %bb.0: # %entry
+; BE-PWR8-NEXT: addis r3, r2, f at toc@ha
+; BE-PWR8-NEXT: li r4, 96
+; BE-PWR8-NEXT: li r5, 112
+; BE-PWR8-NEXT: addi r3, r3, f at toc@l
+; BE-PWR8-NEXT: lxvd2x vs0, r3, r4
+; BE-PWR8-NEXT: li r4, 64
+; BE-PWR8-NEXT: lxvd2x vs1, r3, r5
+; BE-PWR8-NEXT: li r5, 80
+; BE-PWR8-NEXT: lxvd2x vs2, r3, r4
+; BE-PWR8-NEXT: lxvd2x vs3, r3, r5
+; BE-PWR8-NEXT: li r4, 176
+; BE-PWR8-NEXT: li r5, 160
+; BE-PWR8-NEXT: stxvd2x vs1, r3, r4
+; BE-PWR8-NEXT: li r4, 144
+; BE-PWR8-NEXT: stxvd2x vs0, r3, r5
+; BE-PWR8-NEXT: li r5, 128
+; BE-PWR8-NEXT: stxvd2x vs3, r3, r4
+; BE-PWR8-NEXT: stxvd2x vs2, r3, r5
+; BE-PWR8-NEXT: blr
entry:
%arrayidx = getelementptr inbounds <512 x i1>, <512 x i1>* @f, i64 1
%0 = load <512 x i1>, <512 x i1>* %arrayidx, align 64
@@ -78,6 +162,84 @@ define dso_local void @testXLdSt(i64 %SrcIdx, i64 %DstIdx) {
; BE-PAIRED-NEXT: stxv vs3, 48(r3)
; BE-PAIRED-NEXT: stxv vs2, 32(r3)
; BE-PAIRED-NEXT: blr
+;
+; LE-PWR9-LABEL: testXLdSt:
+; LE-PWR9: # %bb.0: # %entry
+; LE-PWR9-NEXT: addis r5, r2, f at toc@ha
+; LE-PWR9-NEXT: sldi r3, r3, 6
+; LE-PWR9-NEXT: addi r5, r5, f at toc@l
+; LE-PWR9-NEXT: add r6, r5, r3
+; LE-PWR9-NEXT: lxvx vs3, r5, r3
+; LE-PWR9-NEXT: sldi r3, r4, 6
+; LE-PWR9-NEXT: lxv vs0, 16(r6)
+; LE-PWR9-NEXT: lxv vs1, 32(r6)
+; LE-PWR9-NEXT: lxv vs2, 48(r6)
+; LE-PWR9-NEXT: stxvx vs3, r5, r3
+; LE-PWR9-NEXT: add r3, r5, r3
+; LE-PWR9-NEXT: stxv vs2, 48(r3)
+; LE-PWR9-NEXT: stxv vs1, 32(r3)
+; LE-PWR9-NEXT: stxv vs0, 16(r3)
+; LE-PWR9-NEXT: blr
+;
+; LE-PWR8-LABEL: testXLdSt:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: addis r5, r2, f at toc@ha
+; LE-PWR8-NEXT: sldi r3, r3, 6
+; LE-PWR8-NEXT: li r6, 48
+; LE-PWR8-NEXT: li r8, 16
+; LE-PWR8-NEXT: li r9, 32
+; LE-PWR8-NEXT: addi r5, r5, f at toc@l
+; LE-PWR8-NEXT: add r7, r5, r3
+; LE-PWR8-NEXT: lxvd2x vs0, r5, r3
+; LE-PWR8-NEXT: sldi r3, r4, 6
+; LE-PWR8-NEXT: lxvd2x vs1, r7, r6
+; LE-PWR8-NEXT: lxvd2x vs2, r7, r8
+; LE-PWR8-NEXT: add r4, r5, r3
+; LE-PWR8-NEXT: lxvd2x vs3, r7, r9
+; LE-PWR8-NEXT: stxvd2x vs0, r5, r3
+; LE-PWR8-NEXT: stxvd2x vs1, r4, r6
+; LE-PWR8-NEXT: stxvd2x vs3, r4, r9
+; LE-PWR8-NEXT: stxvd2x vs2, r4, r8
+; LE-PWR8-NEXT: blr
+;
+; BE-PWR9-LABEL: testXLdSt:
+; BE-PWR9: # %bb.0: # %entry
+; BE-PWR9-NEXT: addis r5, r2, f at toc@ha
+; BE-PWR9-NEXT: sldi r3, r3, 6
+; BE-PWR9-NEXT: addi r5, r5, f at toc@l
+; BE-PWR9-NEXT: add r6, r5, r3
+; BE-PWR9-NEXT: lxvx vs3, r5, r3
+; BE-PWR9-NEXT: sldi r3, r4, 6
+; BE-PWR9-NEXT: lxv vs0, 16(r6)
+; BE-PWR9-NEXT: lxv vs1, 32(r6)
+; BE-PWR9-NEXT: lxv vs2, 48(r6)
+; BE-PWR9-NEXT: stxvx vs3, r5, r3
+; BE-PWR9-NEXT: add r3, r5, r3
+; BE-PWR9-NEXT: stxv vs2, 48(r3)
+; BE-PWR9-NEXT: stxv vs1, 32(r3)
+; BE-PWR9-NEXT: stxv vs0, 16(r3)
+; BE-PWR9-NEXT: blr
+;
+; BE-PWR8-LABEL: testXLdSt:
+; BE-PWR8: # %bb.0: # %entry
+; BE-PWR8-NEXT: addis r5, r2, f at toc@ha
+; BE-PWR8-NEXT: sldi r3, r3, 6
+; BE-PWR8-NEXT: li r6, 32
+; BE-PWR8-NEXT: li r7, 48
+; BE-PWR8-NEXT: li r9, 16
+; BE-PWR8-NEXT: addi r5, r5, f at toc@l
+; BE-PWR8-NEXT: add r8, r5, r3
+; BE-PWR8-NEXT: lxvd2x vs2, r5, r3
+; BE-PWR8-NEXT: sldi r3, r4, 6
+; BE-PWR8-NEXT: lxvd2x vs0, r8, r6
+; BE-PWR8-NEXT: lxvd2x vs1, r8, r7
+; BE-PWR8-NEXT: add r4, r5, r3
+; BE-PWR8-NEXT: lxvd2x vs3, r8, r9
+; BE-PWR8-NEXT: stxvd2x vs2, r5, r3
+; BE-PWR8-NEXT: stxvd2x vs1, r4, r7
+; BE-PWR8-NEXT: stxvd2x vs0, r4, r6
+; BE-PWR8-NEXT: stxvd2x vs3, r4, r9
+; BE-PWR8-NEXT: blr
entry:
%arrayidx = getelementptr inbounds <512 x i1>, <512 x i1>* @f, i64 %SrcIdx
%0 = load <512 x i1>, <512 x i1>* %arrayidx, align 64
@@ -112,6 +274,94 @@ define dso_local void @testUnalignedLdSt() {
; BE-PAIRED-NEXT: pstxv vs3, 67(r3), 0
; BE-PAIRED-NEXT: pstxv vs2, 51(r3), 0
; BE-PAIRED-NEXT: blr
+;
+; LE-PWR9-LABEL: testUnalignedLdSt:
+; LE-PWR9: # %bb.0: # %entry
+; LE-PWR9-NEXT: addis r3, r2, f at toc@ha
+; LE-PWR9-NEXT: li r4, 11
+; LE-PWR9-NEXT: addi r3, r3, f at toc@l
+; LE-PWR9-NEXT: lxvx vs0, r3, r4
+; LE-PWR9-NEXT: li r4, 27
+; LE-PWR9-NEXT: lxvx vs1, r3, r4
+; LE-PWR9-NEXT: li r4, 43
+; LE-PWR9-NEXT: lxvx vs2, r3, r4
+; LE-PWR9-NEXT: li r4, 59
+; LE-PWR9-NEXT: lxvx vs3, r3, r4
+; LE-PWR9-NEXT: li r4, 67
+; LE-PWR9-NEXT: stxvx vs3, r3, r4
+; LE-PWR9-NEXT: li r4, 51
+; LE-PWR9-NEXT: stxvx vs2, r3, r4
+; LE-PWR9-NEXT: li r4, 35
+; LE-PWR9-NEXT: stxvx vs1, r3, r4
+; LE-PWR9-NEXT: li r4, 19
+; LE-PWR9-NEXT: stxvx vs0, r3, r4
+; LE-PWR9-NEXT: blr
+;
+; LE-PWR8-LABEL: testUnalignedLdSt:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: addis r3, r2, f at toc@ha
+; LE-PWR8-NEXT: li r4, 59
+; LE-PWR8-NEXT: li r5, 43
+; LE-PWR8-NEXT: addi r3, r3, f at toc@l
+; LE-PWR8-NEXT: lxvd2x vs0, r3, r4
+; LE-PWR8-NEXT: li r4, 11
+; LE-PWR8-NEXT: lxvd2x vs1, r3, r5
+; LE-PWR8-NEXT: li r5, 27
+; LE-PWR8-NEXT: lxvd2x vs2, r3, r4
+; LE-PWR8-NEXT: lxvd2x vs3, r3, r5
+; LE-PWR8-NEXT: li r4, 51
+; LE-PWR8-NEXT: li r5, 67
+; LE-PWR8-NEXT: stxvd2x vs1, r3, r4
+; LE-PWR8-NEXT: li r4, 35
+; LE-PWR8-NEXT: stxvd2x vs0, r3, r5
+; LE-PWR8-NEXT: li r5, 19
+; LE-PWR8-NEXT: stxvd2x vs3, r3, r4
+; LE-PWR8-NEXT: stxvd2x vs2, r3, r5
+; LE-PWR8-NEXT: blr
+;
+; BE-PWR9-LABEL: testUnalignedLdSt:
+; BE-PWR9: # %bb.0: # %entry
+; BE-PWR9-NEXT: addis r3, r2, f at toc@ha
+; BE-PWR9-NEXT: li r4, 11
+; BE-PWR9-NEXT: addi r3, r3, f at toc@l
+; BE-PWR9-NEXT: lxvx vs0, r3, r4
+; BE-PWR9-NEXT: li r4, 27
+; BE-PWR9-NEXT: lxvx vs1, r3, r4
+; BE-PWR9-NEXT: li r4, 43
+; BE-PWR9-NEXT: lxvx vs2, r3, r4
+; BE-PWR9-NEXT: li r4, 59
+; BE-PWR9-NEXT: lxvx vs3, r3, r4
+; BE-PWR9-NEXT: li r4, 67
+; BE-PWR9-NEXT: stxvx vs3, r3, r4
+; BE-PWR9-NEXT: li r4, 51
+; BE-PWR9-NEXT: stxvx vs2, r3, r4
+; BE-PWR9-NEXT: li r4, 35
+; BE-PWR9-NEXT: stxvx vs1, r3, r4
+; BE-PWR9-NEXT: li r4, 19
+; BE-PWR9-NEXT: stxvx vs0, r3, r4
+; BE-PWR9-NEXT: blr
+;
+; BE-PWR8-LABEL: testUnalignedLdSt:
+; BE-PWR8: # %bb.0: # %entry
+; BE-PWR8-NEXT: addis r3, r2, f at toc@ha
+; BE-PWR8-NEXT: li r4, 43
+; BE-PWR8-NEXT: li r5, 59
+; BE-PWR8-NEXT: addi r3, r3, f at toc@l
+; BE-PWR8-NEXT: lxvd2x vs0, r3, r4
+; BE-PWR8-NEXT: li r4, 11
+; BE-PWR8-NEXT: lxvd2x vs1, r3, r5
+; BE-PWR8-NEXT: li r5, 27
+; BE-PWR8-NEXT: lxvd2x vs2, r3, r4
+; BE-PWR8-NEXT: lxvd2x vs3, r3, r5
+; BE-PWR8-NEXT: li r4, 67
+; BE-PWR8-NEXT: li r5, 51
+; BE-PWR8-NEXT: stxvd2x vs1, r3, r4
+; BE-PWR8-NEXT: li r4, 35
+; BE-PWR8-NEXT: stxvd2x vs0, r3, r5
+; BE-PWR8-NEXT: li r5, 19
+; BE-PWR8-NEXT: stxvd2x vs3, r3, r4
+; BE-PWR8-NEXT: stxvd2x vs2, r3, r5
+; BE-PWR8-NEXT: blr
entry:
%0 = bitcast <512 x i1>* @f to i8*
%add.ptr = getelementptr inbounds i8, i8* %0, i64 11
@@ -141,6 +391,54 @@ define dso_local void @testLdStPair(i64 %SrcIdx, i64 %DstIdx) {
; BE-PAIRED-NEXT: stxv v3, 80(r3)
; BE-PAIRED-NEXT: stxv v2, 64(r3)
; BE-PAIRED-NEXT: blr
+;
+; LE-PWR9-LABEL: testLdStPair:
+; LE-PWR9: # %bb.0: # %entry
+; LE-PWR9-NEXT: addis r3, r2, g at toc@ha
+; LE-PWR9-NEXT: addi r3, r3, g at toc@l
+; LE-PWR9-NEXT: lxv vs0, 32(r3)
+; LE-PWR9-NEXT: lxv vs1, 48(r3)
+; LE-PWR9-NEXT: stxv vs1, 80(r3)
+; LE-PWR9-NEXT: stxv vs0, 64(r3)
+; LE-PWR9-NEXT: blr
+;
+; LE-PWR8-LABEL: testLdStPair:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: addis r3, r2, g at toc@ha
+; LE-PWR8-NEXT: li r4, 32
+; LE-PWR8-NEXT: li r5, 48
+; LE-PWR8-NEXT: addi r3, r3, g at toc@l
+; LE-PWR8-NEXT: lxvd2x vs0, r3, r4
+; LE-PWR8-NEXT: lxvd2x vs1, r3, r5
+; LE-PWR8-NEXT: li r4, 80
+; LE-PWR8-NEXT: li r5, 64
+; LE-PWR8-NEXT: stxvd2x vs1, r3, r4
+; LE-PWR8-NEXT: stxvd2x vs0, r3, r5
+; LE-PWR8-NEXT: blr
+;
+; BE-PWR9-LABEL: testLdStPair:
+; BE-PWR9: # %bb.0: # %entry
+; BE-PWR9-NEXT: addis r3, r2, g at toc@ha
+; BE-PWR9-NEXT: addi r3, r3, g at toc@l
+; BE-PWR9-NEXT: lxv vs0, 32(r3)
+; BE-PWR9-NEXT: lxv vs1, 48(r3)
+; BE-PWR9-NEXT: stxv vs1, 80(r3)
+; BE-PWR9-NEXT: stxv vs0, 64(r3)
+; BE-PWR9-NEXT: blr
+;
+; BE-PWR8-LABEL: testLdStPair:
+; BE-PWR8: # %bb.0: # %entry
+; BE-PWR8-NEXT: addis r3, r2, g at toc@ha
+; BE-PWR8-NEXT: li r4, 32
+; BE-PWR8-NEXT: li r5, 48
+; BE-PWR8-NEXT: addi r3, r3, g at toc@l
+; BE-PWR8-NEXT: lxvd2x vs0, r3, r4
+; BE-PWR8-NEXT: lxvd2x vs1, r3, r5
+; BE-PWR8-NEXT: li r4, 80
+; BE-PWR8-NEXT: li r5, 64
+; BE-PWR8-NEXT: stxvd2x vs1, r3, r4
+; BE-PWR8-NEXT: stxvd2x vs0, r3, r5
+; BE-PWR8-NEXT: blr
entry:
%arrayidx = getelementptr inbounds <256 x i1>, <256 x i1>* @g, i64 1
%0 = load <256 x i1>, <256 x i1>* %arrayidx, align 64
@@ -176,6 +474,64 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) {
; BE-PAIRED-NEXT: stxvx v2, r5, r3
; BE-PAIRED-NEXT: stxv v3, 16(r4)
; BE-PAIRED-NEXT: blr
+;
+; LE-PWR9-LABEL: testXLdStPair:
+; LE-PWR9: # %bb.0: # %entry
+; LE-PWR9-NEXT: addis r5, r2, g at toc@ha
+; LE-PWR9-NEXT: sldi r3, r3, 5
+; LE-PWR9-NEXT: sldi r4, r4, 5
+; LE-PWR9-NEXT: addi r5, r5, g at toc@l
+; LE-PWR9-NEXT: add r6, r5, r3
+; LE-PWR9-NEXT: lxvx vs1, r5, r3
+; LE-PWR9-NEXT: lxv vs0, 16(r6)
+; LE-PWR9-NEXT: add r6, r5, r4
+; LE-PWR9-NEXT: stxvx vs1, r5, r4
+; LE-PWR9-NEXT: stxv vs0, 16(r6)
+; LE-PWR9-NEXT: blr
+;
+; LE-PWR8-LABEL: testXLdStPair:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: addis r5, r2, g at toc@ha
+; LE-PWR8-NEXT: sldi r3, r3, 5
+; LE-PWR8-NEXT: li r7, 16
+; LE-PWR8-NEXT: addi r5, r5, g at toc@l
+; LE-PWR8-NEXT: add r6, r5, r3
+; LE-PWR8-NEXT: lxvd2x vs1, r5, r3
+; LE-PWR8-NEXT: sldi r3, r4, 5
+; LE-PWR8-NEXT: lxvd2x vs0, r6, r7
+; LE-PWR8-NEXT: add r4, r5, r3
+; LE-PWR8-NEXT: stxvd2x vs1, r5, r3
+; LE-PWR8-NEXT: stxvd2x vs0, r4, r7
+; LE-PWR8-NEXT: blr
+;
+; BE-PWR9-LABEL: testXLdStPair:
+; BE-PWR9: # %bb.0: # %entry
+; BE-PWR9-NEXT: addis r5, r2, g at toc@ha
+; BE-PWR9-NEXT: sldi r3, r3, 5
+; BE-PWR9-NEXT: sldi r4, r4, 5
+; BE-PWR9-NEXT: addi r5, r5, g at toc@l
+; BE-PWR9-NEXT: add r6, r5, r3
+; BE-PWR9-NEXT: lxvx vs1, r5, r3
+; BE-PWR9-NEXT: lxv vs0, 16(r6)
+; BE-PWR9-NEXT: add r6, r5, r4
+; BE-PWR9-NEXT: stxvx vs1, r5, r4
+; BE-PWR9-NEXT: stxv vs0, 16(r6)
+; BE-PWR9-NEXT: blr
+;
+; BE-PWR8-LABEL: testXLdStPair:
+; BE-PWR8: # %bb.0: # %entry
+; BE-PWR8-NEXT: addis r5, r2, g at toc@ha
+; BE-PWR8-NEXT: sldi r3, r3, 5
+; BE-PWR8-NEXT: li r7, 16
+; BE-PWR8-NEXT: addi r5, r5, g at toc@l
+; BE-PWR8-NEXT: add r6, r5, r3
+; BE-PWR8-NEXT: lxvd2x vs0, r5, r3
+; BE-PWR8-NEXT: sldi r3, r4, 5
+; BE-PWR8-NEXT: lxvd2x vs1, r6, r7
+; BE-PWR8-NEXT: add r4, r5, r3
+; BE-PWR8-NEXT: stxvd2x vs0, r5, r3
+; BE-PWR8-NEXT: stxvd2x vs1, r4, r7
+; BE-PWR8-NEXT: blr
entry:
%arrayidx = getelementptr inbounds <256 x i1>, <256 x i1>* @g, i64 %SrcIdx
%0 = load <256 x i1>, <256 x i1>* %arrayidx, align 64
@@ -202,6 +558,74 @@ define dso_local void @testUnalignedLdStPair() {
; BE-PAIRED-NEXT: pstxv v3, 35(r3), 0
; BE-PAIRED-NEXT: pstxv v2, 19(r3), 0
; BE-PAIRED-NEXT: blr
+;
+; LE-PWR9-LABEL: testUnalignedLdStPair:
+; LE-PWR9: # %bb.0: # %entry
+; LE-PWR9-NEXT: addis r3, r2, g at toc@ha
+; LE-PWR9-NEXT: li r6, 19
+; LE-PWR9-NEXT: li r4, 11
+; LE-PWR9-NEXT: li r5, 35
+; LE-PWR9-NEXT: li r7, 27
+; LE-PWR9-NEXT: addi r3, r3, g at toc@l
+; LE-PWR9-NEXT: lxvx vs0, r3, r6
+; LE-PWR9-NEXT: ldx r4, r3, r4
+; LE-PWR9-NEXT: ldx r5, r3, r5
+; LE-PWR9-NEXT: stdx r4, r3, r6
+; LE-PWR9-NEXT: stxvx vs0, r3, r7
+; LE-PWR9-NEXT: li r7, 43
+; LE-PWR9-NEXT: stdx r5, r3, r7
+; LE-PWR9-NEXT: blr
+;
+; LE-PWR8-LABEL: testUnalignedLdStPair:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: addis r3, r2, g at toc@ha
+; LE-PWR8-NEXT: li r4, 19
+; LE-PWR8-NEXT: li r5, 11
+; LE-PWR8-NEXT: li r6, 35
+; LE-PWR8-NEXT: li r7, 43
+; LE-PWR8-NEXT: li r8, 27
+; LE-PWR8-NEXT: addi r3, r3, g at toc@l
+; LE-PWR8-NEXT: lxvd2x vs0, r3, r4
+; LE-PWR8-NEXT: ldx r5, r3, r5
+; LE-PWR8-NEXT: ldx r6, r3, r6
+; LE-PWR8-NEXT: stdx r6, r3, r7
+; LE-PWR8-NEXT: stdx r5, r3, r4
+; LE-PWR8-NEXT: stxvd2x vs0, r3, r8
+; LE-PWR8-NEXT: blr
+;
+; BE-PWR9-LABEL: testUnalignedLdStPair:
+; BE-PWR9: # %bb.0: # %entry
+; BE-PWR9-NEXT: addis r3, r2, g at toc@ha
+; BE-PWR9-NEXT: li r6, 19
+; BE-PWR9-NEXT: li r4, 11
+; BE-PWR9-NEXT: li r5, 35
+; BE-PWR9-NEXT: li r7, 27
+; BE-PWR9-NEXT: addi r3, r3, g at toc@l
+; BE-PWR9-NEXT: lxvx vs0, r3, r6
+; BE-PWR9-NEXT: ldx r4, r3, r4
+; BE-PWR9-NEXT: ldx r5, r3, r5
+; BE-PWR9-NEXT: stdx r4, r3, r6
+; BE-PWR9-NEXT: stxvx vs0, r3, r7
+; BE-PWR9-NEXT: li r7, 43
+; BE-PWR9-NEXT: stdx r5, r3, r7
+; BE-PWR9-NEXT: blr
+;
+; BE-PWR8-LABEL: testUnalignedLdStPair:
+; BE-PWR8: # %bb.0: # %entry
+; BE-PWR8-NEXT: addis r3, r2, g at toc@ha
+; BE-PWR8-NEXT: li r4, 19
+; BE-PWR8-NEXT: li r5, 11
+; BE-PWR8-NEXT: li r6, 35
+; BE-PWR8-NEXT: li r7, 27
+; BE-PWR8-NEXT: addi r3, r3, g at toc@l
+; BE-PWR8-NEXT: lxvd2x vs0, r3, r4
+; BE-PWR8-NEXT: ldx r5, r3, r5
+; BE-PWR8-NEXT: ldx r6, r3, r6
+; BE-PWR8-NEXT: stxvd2x vs0, r3, r7
+; BE-PWR8-NEXT: li r7, 43
+; BE-PWR8-NEXT: stdx r5, r3, r4
+; BE-PWR8-NEXT: stdx r6, r3, r7
+; BE-PWR8-NEXT: blr
entry:
%0 = bitcast <256 x i1>* @g to i8*
%add.ptr = getelementptr inbounds i8, i8* %0, i64 11
More information about the cfe-commits
mailing list