[clang] [PowerPC][NFC] autogen mma tc checks via update_cc_test_checks (PR #108584)
Lei Huang via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 13 08:58:30 PDT 2024
https://github.com/lei137 updated https://github.com/llvm/llvm-project/pull/108584
>From 70077d359e2f98a3c4b24fcf638a51c6a0272473 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Fri, 13 Sep 2024 15:00:19 +0000
Subject: [PATCH 1/2] [PowerPC][NFC] autogen mma tc checks via
llvm/utils/update_cc_test_checks.py
Checks for clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c
seem to have been manually upated to rename temp variables even
though it says checks was auto generated. Regenerate via script in
prep for changes needed in a followup patch.
---
.../PowerPC/builtins-ppc-pair-mma-types.c | 204 +++++++++---------
1 file changed, 102 insertions(+), 102 deletions(-)
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c b/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c
index a414a2827b2c43..39c040967dc0c3 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c
@@ -16,18 +16,18 @@
// CHECK-NEXT: store <16 x i8> [[VC:%.*]], ptr [[VC_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-NEXT: store ptr [[TMP0]], ptr [[VQP]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VQP]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = load <512 x i1>, ptr [[TMP2]], align 64
-// CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[VQ1]], align 64
-// CHECK-NEXT: [[TMP4:%.*]] = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
-// CHECK-NEXT: store <512 x i1> [[TMP4]], ptr [[VQ2]], align 64
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VQP]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>, ptr [[TMP1]], align 64
+// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[VQ1]], align 64
+// CHECK-NEXT: [[TMP3:%.*]] = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
+// CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[VQ2]], align 64
+// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
// CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
-// CHECK-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
-// CHECK-NEXT: [[TMP7:%.*]] = call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[TMP5]], <16 x i8> [[TMP6]])
-// CHECK-NEXT: store <512 x i1> [[TMP7]], ptr [[VQ3]], align 64
-// CHECK-NEXT: [[TMP8:%.*]] = load <512 x i1>, ptr [[VQ3]], align 64
-// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[VQP]], align 8
-// CHECK-NEXT: store <512 x i1> [[TMP8]], ptr [[TMP9]], align 64
+// CHECK-NEXT: [[TMP6:%.*]] = call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
+// CHECK-NEXT: store <512 x i1> [[TMP6]], ptr [[VQ3]], align 64
+// CHECK-NEXT: [[TMP7:%.*]] = load <512 x i1>, ptr [[VQ3]], align 64
+// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[VQP]], align 8
+// CHECK-NEXT: store <512 x i1> [[TMP7]], ptr [[TMP8]], align 64
// CHECK-NEXT: ret void
//
// CHECK-BE-LABEL: @testVQLocal(
@@ -42,18 +42,18 @@
// CHECK-BE-NEXT: store <16 x i8> [[VC:%.*]], ptr [[VC_ADDR]], align 16
// CHECK-BE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-BE-NEXT: store ptr [[TMP0]], ptr [[VQP]], align 8
-// CHECK-BE-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VQP]], align 8
-// CHECK-BE-NEXT: [[TMP3:%.*]] = load <512 x i1>, ptr [[TMP2]], align 64
-// CHECK-BE-NEXT: store <512 x i1> [[TMP3]], ptr [[VQ1]], align 64
-// CHECK-BE-NEXT: [[TMP4:%.*]] = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
-// CHECK-BE-NEXT: store <512 x i1> [[TMP4]], ptr [[VQ2]], align 64
+// CHECK-BE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VQP]], align 8
+// CHECK-BE-NEXT: [[TMP2:%.*]] = load <512 x i1>, ptr [[TMP1]], align 64
+// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], ptr [[VQ1]], align 64
+// CHECK-BE-NEXT: [[TMP3:%.*]] = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
+// CHECK-BE-NEXT: store <512 x i1> [[TMP3]], ptr [[VQ2]], align 64
+// CHECK-BE-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
// CHECK-BE-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
-// CHECK-BE-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
-// CHECK-BE-NEXT: [[TMP7:%.*]] = call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[TMP5]], <16 x i8> [[TMP6]])
-// CHECK-BE-NEXT: store <512 x i1> [[TMP7]], ptr [[VQ3]], align 64
-// CHECK-BE-NEXT: [[TMP8:%.*]] = load <512 x i1>, ptr [[VQ3]], align 64
-// CHECK-BE-NEXT: [[TMP9:%.*]] = load ptr, ptr [[VQP]], align 8
-// CHECK-BE-NEXT: store <512 x i1> [[TMP8]], ptr [[TMP9]], align 64
+// CHECK-BE-NEXT: [[TMP6:%.*]] = call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
+// CHECK-BE-NEXT: store <512 x i1> [[TMP6]], ptr [[VQ3]], align 64
+// CHECK-BE-NEXT: [[TMP7:%.*]] = load <512 x i1>, ptr [[VQ3]], align 64
+// CHECK-BE-NEXT: [[TMP8:%.*]] = load ptr, ptr [[VQP]], align 8
+// CHECK-BE-NEXT: store <512 x i1> [[TMP7]], ptr [[TMP8]], align 64
// CHECK-BE-NEXT: ret void
//
void testVQLocal(int *ptr, vector unsigned char vc) {
@@ -79,24 +79,24 @@ void testVQLocal(int *ptr, vector unsigned char vc) {
// CHECK-NEXT: store <16 x i8> [[VC:%.*]], ptr [[VC_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-NEXT: store ptr [[TMP0]], ptr [[VPP]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VPP]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, ptr [[TMP2]], align 32
-// CHECK-NEXT: store <256 x i1> [[TMP3]], ptr [[VP1]], align 32
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VPP]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>, ptr [[TMP1]], align 32
+// CHECK-NEXT: store <256 x i1> [[TMP2]], ptr [[VP1]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
-// CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
-// CHECK-NEXT: [[TMP6:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
-// CHECK-NEXT: store <256 x i1> [[TMP6]], ptr [[VP2]], align 64
+// CHECK-NEXT: [[TMP5:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]])
+// CHECK-NEXT: store <256 x i1> [[TMP5]], ptr [[VP2]], align 64
+// CHECK-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
// CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
-// CHECK-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
-// CHECK-NEXT: [[TMP9:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP8]], <16 x i8> [[TMP7]])
-// CHECK-NEXT: store <256 x i1> [[TMP9]], ptr [[VP2]], align 64
-// CHECK-NEXT: [[TMP10:%.*]] = load <256 x i1>, ptr [[VP3]], align 32
-// CHECK-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
-// CHECK-NEXT: [[TMP12:%.*]] = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP10]], <16 x i8> [[TMP11]])
-// CHECK-NEXT: store <512 x i1> [[TMP12]], ptr [[VQ]], align 64
-// CHECK-NEXT: [[TMP13:%.*]] = load <256 x i1>, ptr [[VP3]], align 32
-// CHECK-NEXT: [[TMP14:%.*]] = load ptr, ptr [[VPP]], align 8
-// CHECK-NEXT: store <256 x i1> [[TMP13]], ptr [[TMP14]], align 32
+// CHECK-NEXT: [[TMP8:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP7]], <16 x i8> [[TMP6]])
+// CHECK-NEXT: store <256 x i1> [[TMP8]], ptr [[VP2]], align 64
+// CHECK-NEXT: [[TMP9:%.*]] = load <256 x i1>, ptr [[VP3]], align 32
+// CHECK-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
+// CHECK-NEXT: [[TMP11:%.*]] = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP9]], <16 x i8> [[TMP10]])
+// CHECK-NEXT: store <512 x i1> [[TMP11]], ptr [[VQ]], align 64
+// CHECK-NEXT: [[TMP12:%.*]] = load <256 x i1>, ptr [[VP3]], align 32
+// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[VPP]], align 8
+// CHECK-NEXT: store <256 x i1> [[TMP12]], ptr [[TMP13]], align 32
// CHECK-NEXT: ret void
//
// CHECK-BE-LABEL: @testVPLocal(
@@ -112,24 +112,24 @@ void testVQLocal(int *ptr, vector unsigned char vc) {
// CHECK-BE-NEXT: store <16 x i8> [[VC:%.*]], ptr [[VC_ADDR]], align 16
// CHECK-BE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-BE-NEXT: store ptr [[TMP0]], ptr [[VPP]], align 8
-// CHECK-BE-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VPP]], align 8
-// CHECK-BE-NEXT: [[TMP3:%.*]] = load <256 x i1>, ptr [[TMP2]], align 32
-// CHECK-BE-NEXT: store <256 x i1> [[TMP3]], ptr [[VP1]], align 32
+// CHECK-BE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VPP]], align 8
+// CHECK-BE-NEXT: [[TMP2:%.*]] = load <256 x i1>, ptr [[TMP1]], align 32
+// CHECK-BE-NEXT: store <256 x i1> [[TMP2]], ptr [[VP1]], align 32
+// CHECK-BE-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
// CHECK-BE-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
-// CHECK-BE-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
-// CHECK-BE-NEXT: [[TMP6:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
-// CHECK-BE-NEXT: store <256 x i1> [[TMP6]], ptr [[VP2]], align 64
+// CHECK-BE-NEXT: [[TMP5:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]])
+// CHECK-BE-NEXT: store <256 x i1> [[TMP5]], ptr [[VP2]], align 64
+// CHECK-BE-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
// CHECK-BE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
-// CHECK-BE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
-// CHECK-BE-NEXT: [[TMP9:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
-// CHECK-BE-NEXT: store <256 x i1> [[TMP9]], ptr [[VP2]], align 64
-// CHECK-BE-NEXT: [[TMP10:%.*]] = load <256 x i1>, ptr [[VP3]], align 32
-// CHECK-BE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
-// CHECK-BE-NEXT: [[TMP12:%.*]] = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP10]], <16 x i8> [[TMP11]])
-// CHECK-BE-NEXT: store <512 x i1> [[TMP12]], ptr [[VQ]], align 64
-// CHECK-BE-NEXT: [[TMP13:%.*]] = load <256 x i1>, ptr [[VP3]], align 32
-// CHECK-BE-NEXT: [[TMP14:%.*]] = load ptr, ptr [[VPP]], align 8
-// CHECK-BE-NEXT: store <256 x i1> [[TMP13]], ptr [[TMP14]], align 32
+// CHECK-BE-NEXT: [[TMP8:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP6]], <16 x i8> [[TMP7]])
+// CHECK-BE-NEXT: store <256 x i1> [[TMP8]], ptr [[VP2]], align 64
+// CHECK-BE-NEXT: [[TMP9:%.*]] = load <256 x i1>, ptr [[VP3]], align 32
+// CHECK-BE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
+// CHECK-BE-NEXT: [[TMP11:%.*]] = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP9]], <16 x i8> [[TMP10]])
+// CHECK-BE-NEXT: store <512 x i1> [[TMP11]], ptr [[VQ]], align 64
+// CHECK-BE-NEXT: [[TMP12:%.*]] = load <256 x i1>, ptr [[VP3]], align 32
+// CHECK-BE-NEXT: [[TMP13:%.*]] = load ptr, ptr [[VPP]], align 8
+// CHECK-BE-NEXT: store <256 x i1> [[TMP12]], ptr [[TMP13]], align 32
// CHECK-BE-NEXT: ret void
//
void testVPLocal(int *ptr, vector unsigned char vc) {
@@ -154,18 +154,18 @@ void testVPLocal(int *ptr, vector unsigned char vc) {
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ACC_ADDR]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>, ptr [[TMP1]], align 64
// CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]])
-// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0
-// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 0
-// CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP6]], align 16
-// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1
-// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 1
-// CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 16
-// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2
-// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 2
-// CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP10]], align 16
-// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3
-// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 3
-// CHECK-NEXT: store <16 x i8> [[TMP11]], ptr [[TMP12]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0
+// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 0
+// CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP5]], align 16
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1
+// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 1
+// CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP7]], align 16
+// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2
+// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 2
+// CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[TMP9]], align 16
+// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3
+// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 3
+// CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP11]], align 16
// CHECK-NEXT: ret void
//
// CHECK-BE-LABEL: @testRestrictQualifiedPointer2(
@@ -178,18 +178,18 @@ void testVPLocal(int *ptr, vector unsigned char vc) {
// CHECK-BE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ACC_ADDR]], align 8
// CHECK-BE-NEXT: [[TMP2:%.*]] = load <512 x i1>, ptr [[TMP1]], align 64
// CHECK-BE-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]])
-// CHECK-BE-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0
-// CHECK-BE-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 0
-// CHECK-BE-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP6]], align 16
-// CHECK-BE-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1
-// CHECK-BE-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 1
-// CHECK-BE-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 16
-// CHECK-BE-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2
-// CHECK-BE-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 2
-// CHECK-BE-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP10]], align 16
-// CHECK-BE-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3
-// CHECK-BE-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 3
-// CHECK-BE-NEXT: store <16 x i8> [[TMP11]], ptr [[TMP12]], align 16
+// CHECK-BE-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0
+// CHECK-BE-NEXT: [[TMP5:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 0
+// CHECK-BE-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP5]], align 16
+// CHECK-BE-NEXT: [[TMP6:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1
+// CHECK-BE-NEXT: [[TMP7:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 1
+// CHECK-BE-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP7]], align 16
+// CHECK-BE-NEXT: [[TMP8:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2
+// CHECK-BE-NEXT: [[TMP9:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 2
+// CHECK-BE-NEXT: store <16 x i8> [[TMP8]], ptr [[TMP9]], align 16
+// CHECK-BE-NEXT: [[TMP10:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3
+// CHECK-BE-NEXT: [[TMP11:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 3
+// CHECK-BE-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP11]], align 16
// CHECK-BE-NEXT: ret void
//
void testRestrictQualifiedPointer2(__vector_quad *__restrict acc) {
@@ -207,18 +207,18 @@ void testRestrictQualifiedPointer2(__vector_quad *__restrict acc) {
// CHECK-NEXT: [[TMP1:%.*]] = load volatile ptr, ptr [[ACC_ADDR]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>, ptr [[TMP1]], align 64
// CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]])
-// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0
-// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 0
-// CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP6]], align 16
-// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1
-// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 1
-// CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 16
-// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2
-// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 2
-// CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP10]], align 16
-// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3
-// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 3
-// CHECK-NEXT: store <16 x i8> [[TMP11]], ptr [[TMP12]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0
+// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 0
+// CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP5]], align 16
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1
+// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 1
+// CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP7]], align 16
+// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2
+// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 2
+// CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[TMP9]], align 16
+// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3
+// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 3
+// CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP11]], align 16
// CHECK-NEXT: ret void
//
// CHECK-BE-LABEL: @testVolatileQualifiedPointer2(
@@ -231,18 +231,18 @@ void testRestrictQualifiedPointer2(__vector_quad *__restrict acc) {
// CHECK-BE-NEXT: [[TMP1:%.*]] = load volatile ptr, ptr [[ACC_ADDR]], align 8
// CHECK-BE-NEXT: [[TMP2:%.*]] = load <512 x i1>, ptr [[TMP1]], align 64
// CHECK-BE-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]])
-// CHECK-BE-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0
-// CHECK-BE-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 0
-// CHECK-BE-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP6]], align 16
-// CHECK-BE-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1
-// CHECK-BE-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 1
-// CHECK-BE-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 16
-// CHECK-BE-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2
-// CHECK-BE-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 2
-// CHECK-BE-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP10]], align 16
-// CHECK-BE-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3
-// CHECK-BE-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 3
-// CHECK-BE-NEXT: store <16 x i8> [[TMP11]], ptr [[TMP12]], align 16
+// CHECK-BE-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0
+// CHECK-BE-NEXT: [[TMP5:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 0
+// CHECK-BE-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP5]], align 16
+// CHECK-BE-NEXT: [[TMP6:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1
+// CHECK-BE-NEXT: [[TMP7:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 1
+// CHECK-BE-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP7]], align 16
+// CHECK-BE-NEXT: [[TMP8:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2
+// CHECK-BE-NEXT: [[TMP9:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 2
+// CHECK-BE-NEXT: store <16 x i8> [[TMP8]], ptr [[TMP9]], align 16
+// CHECK-BE-NEXT: [[TMP10:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3
+// CHECK-BE-NEXT: [[TMP11:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 3
+// CHECK-BE-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP11]], align 16
// CHECK-BE-NEXT: ret void
//
void testVolatileQualifiedPointer2(__vector_quad *__volatile acc) {
>From 1856d901bbb03e0985760f71dda0d79d9148b80a Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Fri, 13 Sep 2024 16:09:33 +0000
Subject: [PATCH 2/2] add non-opt checks for mma build pair test
---
.../PowerPC/builtins-ppc-build-pair-mma.c | 68 +++++++++++++++++++
1 file changed, 68 insertions(+)
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c b/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c
index 471a31a8c5eac2..8a2bc93dd6cd0a 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c
@@ -3,6 +3,8 @@
// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-LE
// RUN: %clang_cc1 -O3 -triple powerpc64-unknown-unknown -target-cpu pwr10 \
// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-BE
+// RUN: %clang_cc1 -O0 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
+// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-LE-NOOPT
// CHECK-LE-LABEL: @test1(
// CHECK-LE-NEXT: entry:
@@ -16,6 +18,42 @@
// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2:![0-9]+]]
// CHECK-BE-NEXT: ret void
//
+// CHECK-LE-NOOPT-LABEL: @test1(
+// CHECK-LE-NOOPT-NEXT: entry:
+// CHECK-LE-NOOPT-NEXT: [[VQP_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-LE-NOOPT-NEXT: [[VPP_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-LE-NOOPT-NEXT: [[VC1_ADDR:%.*]] = alloca <16 x i8>, align 16
+// CHECK-LE-NOOPT-NEXT: [[VC2_ADDR:%.*]] = alloca <16 x i8>, align 16
+// CHECK-LE-NOOPT-NEXT: [[VC3_ADDR:%.*]] = alloca <16 x i8>, align 16
+// CHECK-LE-NOOPT-NEXT: [[VC4_ADDR:%.*]] = alloca <16 x i8>, align 16
+// CHECK-LE-NOOPT-NEXT: [[RESP_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-LE-NOOPT-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64
+// CHECK-LE-NOOPT-NEXT: [[VP:%.*]] = alloca <256 x i1>, align 32
+// CHECK-LE-NOOPT-NEXT: [[RES:%.*]] = alloca <512 x i1>, align 64
+// CHECK-LE-NOOPT-NEXT: store ptr [[VQP:%.*]], ptr [[VQP_ADDR]], align 8
+// CHECK-LE-NOOPT-NEXT: store ptr [[VPP:%.*]], ptr [[VPP_ADDR]], align 8
+// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC1:%.*]], ptr [[VC1_ADDR]], align 16
+// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC2:%.*]], ptr [[VC2_ADDR]], align 16
+// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC3:%.*]], ptr [[VC3_ADDR]], align 16
+// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC4:%.*]], ptr [[VC4_ADDR]], align 16
+// CHECK-LE-NOOPT-NEXT: store ptr [[RESP:%.*]], ptr [[RESP_ADDR]], align 8
+// CHECK-LE-NOOPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VQP_ADDR]], align 8
+// CHECK-LE-NOOPT-NEXT: [[TMP1:%.*]] = load <512 x i1>, ptr [[TMP0]], align 64
+// CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP1]], ptr [[VQ]], align 64
+// CHECK-LE-NOOPT-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VPP_ADDR]], align 8
+// CHECK-LE-NOOPT-NEXT: [[TMP3:%.*]] = load <256 x i1>, ptr [[TMP2]], align 32
+// CHECK-LE-NOOPT-NEXT: store <256 x i1> [[TMP3]], ptr [[VP]], align 32
+// CHECK-LE-NOOPT-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[VC1_ADDR]], align 16
+// CHECK-LE-NOOPT-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC2_ADDR]], align 16
+// CHECK-LE-NOOPT-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[VC3_ADDR]], align 16
+// CHECK-LE-NOOPT-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr [[VC4_ADDR]], align 16
+// CHECK-LE-NOOPT-NEXT: [[TMP8:%.*]] = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[TMP7]], <16 x i8> [[TMP6]], <16 x i8> [[TMP5]], <16 x i8> [[TMP4]])
+// CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP8]], ptr [[RES]], align 64
+// CHECK-LE-NOOPT-NEXT: [[TMP9:%.*]] = load <512 x i1>, ptr [[RES]], align 64
+// CHECK-LE-NOOPT-NEXT: [[TMP10:%.*]] = load ptr, ptr [[RESP_ADDR]], align 8
+// CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP9]], ptr [[TMP10]], align 64
+// CHECK-LE-NOOPT-NEXT: ret void
+//
void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, vector unsigned char vc2,
vector unsigned char vc3, vector unsigned char vc4, unsigned char *resp) {
__vector_quad vq = *((__vector_quad *)vqp);
@@ -37,6 +75,36 @@ void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, vec
// CHECK-BE-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6:![0-9]+]]
// CHECK-BE-NEXT: ret void
//
+// CHECK-LE-NOOPT-LABEL: @test2(
+// CHECK-LE-NOOPT-NEXT: entry:
+// CHECK-LE-NOOPT-NEXT: [[VQP_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-LE-NOOPT-NEXT: [[VPP_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-LE-NOOPT-NEXT: [[VC1_ADDR:%.*]] = alloca <16 x i8>, align 16
+// CHECK-LE-NOOPT-NEXT: [[VC2_ADDR:%.*]] = alloca <16 x i8>, align 16
+// CHECK-LE-NOOPT-NEXT: [[RESP_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-LE-NOOPT-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64
+// CHECK-LE-NOOPT-NEXT: [[VP:%.*]] = alloca <256 x i1>, align 32
+// CHECK-LE-NOOPT-NEXT: [[RES:%.*]] = alloca <256 x i1>, align 32
+// CHECK-LE-NOOPT-NEXT: store ptr [[VQP:%.*]], ptr [[VQP_ADDR]], align 8
+// CHECK-LE-NOOPT-NEXT: store ptr [[VPP:%.*]], ptr [[VPP_ADDR]], align 8
+// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC1:%.*]], ptr [[VC1_ADDR]], align 16
+// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC2:%.*]], ptr [[VC2_ADDR]], align 16
+// CHECK-LE-NOOPT-NEXT: store ptr [[RESP:%.*]], ptr [[RESP_ADDR]], align 8
+// CHECK-LE-NOOPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VQP_ADDR]], align 8
+// CHECK-LE-NOOPT-NEXT: [[TMP1:%.*]] = load <512 x i1>, ptr [[TMP0]], align 64
+// CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP1]], ptr [[VQ]], align 64
+// CHECK-LE-NOOPT-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VPP_ADDR]], align 8
+// CHECK-LE-NOOPT-NEXT: [[TMP3:%.*]] = load <256 x i1>, ptr [[TMP2]], align 32
+// CHECK-LE-NOOPT-NEXT: store <256 x i1> [[TMP3]], ptr [[VP]], align 32
+// CHECK-LE-NOOPT-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[VC1_ADDR]], align 16
+// CHECK-LE-NOOPT-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC2_ADDR]], align 16
+// CHECK-LE-NOOPT-NEXT: [[TMP6:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP5]], <16 x i8> [[TMP4]])
+// CHECK-LE-NOOPT-NEXT: store <256 x i1> [[TMP6]], ptr [[RES]], align 64
+// CHECK-LE-NOOPT-NEXT: [[TMP7:%.*]] = load <256 x i1>, ptr [[RES]], align 32
+// CHECK-LE-NOOPT-NEXT: [[TMP8:%.*]] = load ptr, ptr [[RESP_ADDR]], align 8
+// CHECK-LE-NOOPT-NEXT: store <256 x i1> [[TMP7]], ptr [[TMP8]], align 32
+// CHECK-LE-NOOPT-NEXT: ret void
+//
void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1,
vector unsigned char vc2, unsigned char *resp) {
__vector_quad vq = *((__vector_quad *)vqp);
More information about the cfe-commits
mailing list